svn commit: r1366844 - in /nutch/branches/2.x: CHANGES.txt build.xml

2012-07-29 Thread lewismc
Author: lewismc
Date: Sun Jul 29 13:03:43 2012
New Revision: 1366844

URL: http://svn.apache.org/viewvc?rev=1366844view=rev
Log:
NUTCH-1376 Add description parameter to every ant task

Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/build.xml

Modified: nutch/branches/2.x/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1366844r1=1366843r2=1366844view=diff
==
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Sun Jul 29 13:03:43 2012
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 2.1 - Current Development
 
+* NUTCH-1376 add ant description parameters (lewismc)
+
 * NUTCH-1440 reconfigure non-existent stopwords_en.txt in schema-solr4.xml 
(shekhar sharma via lewismc)
 
 * NUTCH-1439 Define boost field as type float in schema-solr4.xml (shekhar 
sharma via lewismc)

Modified: nutch/branches/2.x/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/build.xml?rev=1366844r1=1366843r2=1366844view=diff
==
--- nutch/branches/2.x/build.xml (original)
+++ nutch/branches/2.x/build.xml Sun Jul 29 13:03:43 2012
@@ -61,7 +61,7 @@
  !-- == --
  !-- Stuff needed by all targets --
  !-- == --
- target name=init depends=ivy-init
+ target name=init depends=ivy-init description=-- stuff required by all 
targets
   mkdir dir=${build.dir} /
   mkdir dir=${build.classes} /
   mkdir dir=${release.dir} /
@@ -82,9 +82,9 @@
  !-- == --
  !-- Compile the Java files --
  !-- == --
- target name=compile depends=compile-core, compile-plugins /
+ target name=compile depends=compile-core, compile-plugins 
description=-- compile all Java files/
 
- target name=compile-core depends=init, resolve-default
+ target name=compile-core depends=init, resolve-default description=-- 
compile core Java files only
   javac 
encoding=${build.encoding} 
srcdir=${src.dir}
@@ -100,7 +100,7 @@
   /javac
  /target
 
- target name=compile-plugins depends=init, resolve-default
+ target name=compile-plugins depends=init, resolve-default 
description=-- compile plugins only
   ant dir=src/plugin target=deploy inheritAll=false /
  /target
 
@@ -109,7 +109,7 @@
  !-- == --
  !-- --
  !-- == --
- target name=jar depends=compile-core
+ target name=jar depends=compile-core description=-- make nutch.jar
   copy file=${conf.dir}/nutch-default.xml todir=${build.classes} /
   copy file=${conf.dir}/nutch-site.xml todir=${build.classes} /
   jar jarfile=${build.dir}/${final.name}.jar basedir=${build.classes}
@@ -262,7 +262,7 @@
  !-- == --
  !-- --
  !-- == --
- target name=job depends=compile
+ target name=job depends=compile description=-- make nutch.job jar
   jar jarfile=${build.dir}/${final.name}.job
!-- 
If the build.classes has the nutch config files because the jar command 
@@ -280,7 +280,7 @@
   /jar
  /target
 
- target name=runtime depends=jar, job
+ target name=runtime depends=jar, job description=-- default target for 
running Nutch
   mkdir dir=${runtime.dir} /
   mkdir dir=${runtime.local} /
   mkdir dir=${runtime.deploy} /
@@ -321,7 +321,7 @@
  !-- == --
  !-- Compile test code --
  !-- == --
- target name=compile-core-test depends=compile-core, resolve-test
+ target name=compile-core-test depends=compile-core, resolve-test 
description=-- compile test code
   javac 
encoding=${build.encoding} 
srcdir=${test.src.dir}
@@ -341,7 +341,7 @@
  !-- Run Nutch proxy --
  !-- == --
 
- target name=proxy depends=job, compile-core-test
+ target name=proxy depends=job, compile-core-test description=-- run 
nutch proxy
   java classname=org.apache.nutch.tools.proxy.TestbedProxy fork=true
classpath refid=test.classpath /
arg value=-fake /
@@ -357,7 +357,7 @@
  !-- Run Nutch benchmarking analysis --
  !-- == --
 
- target name=benchmark
+ target name=benchmark description=-- run nutch benchmarking analysis
   java classname=org.apache.nutch.tools.Benchmark fork=true
classpath refid=test.classpath /
jvmarg line=-Xmx512m 
-Djavax.xml.parsers.DocumentBuilderFactory=com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl
 /
@@ -374,9 +374,9

svn commit: r1366847 - in /nutch/trunk: CHANGES.txt src/java/org/apache/nutch/metadata/Metadata.java src/java/org/apache/nutch/metadata/Office.java

2012-07-29 Thread lewismc
Author: lewismc
Date: Sun Jul 29 13:13:25 2012
New Revision: 1366847

URL: http://svn.apache.org/viewvc?rev=1366847view=rev
Log:
NUTCH-1416 Remove o.a.n.metadata.Office

Removed:
nutch/trunk/src/java/org/apache/nutch/metadata/Office.java
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1366847r1=1366846r2=1366847view=diff
==
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Sun Jul 29 13:13:25 2012
@@ -2,6 +2,8 @@ Nutch Change Log
 
 (trunk) Current Development:
 
+* NUTCH-1417 Remove o.a.n.metadata.Office (lewismc)
+
 * NUTCH-1376 Add description parameter to every ant task (lewismc)
 
 * NUTCH-1440 reconfigure non-existent stopwords_en.txt in schema-solr4.xml 
(shekhar sharma via lewismc)

Modified: nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java?rev=1366847r1=1366846r2=1366847view=diff
==
--- nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java Sun Jul 29 
13:13:25 2012
@@ -36,7 +36,7 @@ import org.apache.hadoop.io.Writable;
  *
  */
 public class Metadata implements Writable, CreativeCommons,
-DublinCore, HttpHeaders, Nutch, Office, Feed {
+DublinCore, HttpHeaders, Nutch, Feed {
 
   /**
* A map of all metadata attributes.




svn commit: r1366342 - in /nutch/trunk: CHANGES.txt conf/schema-solr4.xml

2012-07-27 Thread lewismc
Author: lewismc
Date: Fri Jul 27 11:38:07 2012
New Revision: 1366342

URL: http://svn.apache.org/viewvc?rev=1366342view=rev
Log:
NUTCH-1440 reconfigure non-existent stopwords_en.txt in schema-solr4.xml

Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/conf/schema-solr4.xml

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1366342r1=1366341r2=1366342view=diff
==
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Fri Jul 27 11:38:07 2012
@@ -2,6 +2,8 @@ Nutch Change Log
 
 (trunk) Current Development:
 
+* NUTCH-1440 reconfigure non-existent stopwords_en.txt in schema-solr4.xml 
(shekhar sharma via lewismc)
+
 * NUTCH-1439 Define boost field as type float in schema-solr4.xml (shekhar 
sharma via lewismc)
 
 * NUTCH-1433 Upgrade to Tika 1.2 (jnioche)

Modified: nutch/trunk/conf/schema-solr4.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/conf/schema-solr4.xml?rev=1366342r1=1366341r2=1366342view=diff
==
--- nutch/trunk/conf/schema-solr4.xml (original)
+++ nutch/trunk/conf/schema-solr4.xml Fri Jul 27 11:38:07 2012
@@ -120,7 +120,7 @@
 
 !-- A text field with defaults appropriate for English: it
  tokenizes with StandardTokenizer, removes English stop words
- (stopwords_en.txt), down cases, protects words from protwords.txt, and
+ (stopwords.txt), down cases, protects words from protwords.txt, and
  finally applies Porter's stemming.  The query time analyzer
  also applies synonyms from synonyms.txt. --
 fieldType name=text_en class=solr.TextField 
positionIncrementGap=100
@@ -135,7 +135,7 @@
 --
 filter class=solr.StopFilterFactory
 ignoreCase=true
-words=stopwords_en.txt
+words=stopwords.txt
 enablePositionIncrements=true
 /
 filter class=solr.LowerCaseFilterFactory/
@@ -151,7 +151,7 @@
 filter class=solr.SynonymFilterFactory synonyms=synonyms.txt 
ignoreCase=true expand=true/
 filter class=solr.StopFilterFactory
 ignoreCase=true
-words=stopwords_en.txt
+words=stopwords.txt
 enablePositionIncrements=true
 /
 filter class=solr.LowerCaseFilterFactory/
@@ -188,7 +188,7 @@
 --
 filter class=solr.StopFilterFactory
 ignoreCase=true
-words=stopwords_en.txt
+words=stopwords.txt
 enablePositionIncrements=true
 /
 filter class=solr.WordDelimiterFilterFactory generateWordParts=1 
generateNumberParts=1 catenateWords=1 catenateNumbers=1 catenateAll=0 
splitOnCaseChange=1/
@@ -201,7 +201,7 @@
 filter class=solr.SynonymFilterFactory synonyms=synonyms.txt 
ignoreCase=true expand=true/
 filter class=solr.StopFilterFactory
 ignoreCase=true
-words=stopwords_en.txt
+words=stopwords.txt
 enablePositionIncrements=true
 /
 filter class=solr.WordDelimiterFilterFactory generateWordParts=1 
generateNumberParts=1 catenateWords=0 catenateNumbers=0 catenateAll=0 
splitOnCaseChange=1/
@@ -217,7 +217,7 @@
   analyzer
 tokenizer class=solr.WhitespaceTokenizerFactory/
 filter class=solr.SynonymFilterFactory synonyms=synonyms.txt 
ignoreCase=true expand=false/
-filter class=solr.StopFilterFactory ignoreCase=true 
words=stopwords_en.txt/
+filter class=solr.StopFilterFactory ignoreCase=true 
words=stopwords.txt/
 filter class=solr.WordDelimiterFilterFactory generateWordParts=0 
generateNumberParts=0 catenateWords=1 catenateNumbers=1 catenateAll=0/
 filter class=solr.LowerCaseFilterFactory/
 filter class=solr.KeywordMarkerFilterFactory 
protected=protwords.txt/




svn commit: r1366348 - in /nutch/branches/2.x: CHANGES.txt conf/schema-solr4.xml

2012-07-27 Thread lewismc
Author: lewismc
Date: Fri Jul 27 11:55:22 2012
New Revision: 1366348

URL: http://svn.apache.org/viewvc?rev=1366348view=rev
Log:
NUTCH-1440 reconfigure non-existent stopwords_en.txt in schema-solr4.xml

Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/conf/schema-solr4.xml

Modified: nutch/branches/2.x/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1366348r1=1366347r2=1366348view=diff
==
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Fri Jul 27 11:55:22 2012
@@ -1,6 +1,11 @@
 Nutch Change Log
 
 Release 2.1 - Current Development
+
+* NUTCH-1440 reconfigure non-existent stopwords_en.txt in schema-solr4.xml 
(shekhar sharma via lewismc)
+
+* NUTCH-1439 Define boost field as type float in schema-solr4.xml (shekhar 
sharma via lewismc)
+
 * NUTCH-1438 ParserJob support for option -reparse (ferdy)
 
 * NUTCH-1437 HostInjectorJob to accept lines with or without protocol (ferdy)

Modified: nutch/branches/2.x/conf/schema-solr4.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/conf/schema-solr4.xml?rev=1366348r1=1366347r2=1366348view=diff
==
--- nutch/branches/2.x/conf/schema-solr4.xml (original)
+++ nutch/branches/2.x/conf/schema-solr4.xml Fri Jul 27 11:55:22 2012
@@ -25,7 +25,7 @@
 for more info.
 --
 
-schema name=nutch version=1.6-SNAPSHOT
+schema name=nutch version=2.1-SNAPSHOT
 
   types
 
@@ -120,7 +120,7 @@
 
 !-- A text field with defaults appropriate for English: it
  tokenizes with StandardTokenizer, removes English stop words
- (stopwords_en.txt), down cases, protects words from protwords.txt, and
+ (stopwords.txt), down cases, protects words from protwords.txt, and
  finally applies Porter's stemming.  The query time analyzer
  also applies synonyms from synonyms.txt. --
 fieldType name=text_en class=solr.TextField 
positionIncrementGap=100
@@ -135,7 +135,7 @@
 --
 filter class=solr.StopFilterFactory
 ignoreCase=true
-words=stopwords_en.txt
+words=stopwords.txt
 enablePositionIncrements=true
 /
 filter class=solr.LowerCaseFilterFactory/
@@ -151,7 +151,7 @@
 filter class=solr.SynonymFilterFactory synonyms=synonyms.txt 
ignoreCase=true expand=true/
 filter class=solr.StopFilterFactory
 ignoreCase=true
-words=stopwords_en.txt
+words=stopwords.txt
 enablePositionIncrements=true
 /
 filter class=solr.LowerCaseFilterFactory/
@@ -188,7 +188,7 @@
 --
 filter class=solr.StopFilterFactory
 ignoreCase=true
-words=stopwords_en.txt
+words=stopwords.txt
 enablePositionIncrements=true
 /
 filter class=solr.WordDelimiterFilterFactory generateWordParts=1 
generateNumberParts=1 catenateWords=1 catenateNumbers=1 catenateAll=0 
splitOnCaseChange=1/
@@ -201,7 +201,7 @@
 filter class=solr.SynonymFilterFactory synonyms=synonyms.txt 
ignoreCase=true expand=true/
 filter class=solr.StopFilterFactory
 ignoreCase=true
-words=stopwords_en.txt
+words=stopwords.txt
 enablePositionIncrements=true
 /
 filter class=solr.WordDelimiterFilterFactory generateWordParts=1 
generateNumberParts=1 catenateWords=0 catenateNumbers=0 catenateAll=0 
splitOnCaseChange=1/
@@ -217,7 +217,7 @@
   analyzer
 tokenizer class=solr.WhitespaceTokenizerFactory/
 filter class=solr.SynonymFilterFactory synonyms=synonyms.txt 
ignoreCase=true expand=false/
-filter class=solr.StopFilterFactory ignoreCase=true 
words=stopwords_en.txt/
+filter class=solr.StopFilterFactory ignoreCase=true 
words=stopwords.txt/
 filter class=solr.WordDelimiterFilterFactory generateWordParts=0 
generateNumberParts=0 catenateWords=1 catenateNumbers=1 catenateAll=0/
 filter class=solr.LowerCaseFilterFactory/
 filter class=solr.KeywordMarkerFilterFactory 
protected=protwords.txt/




svn commit: r1365973 - /nutch/branches/2.x/doap.rdf

2012-07-26 Thread lewismc
Author: lewismc
Date: Thu Jul 26 13:03:54 2012
New Revision: 1365973

URL: http://svn.apache.org/viewvc?rev=1365973view=rev
Log:
remove unnecessary doap.rdf

Removed:
nutch/branches/2.x/doap.rdf



svn commit: r1365972 - /nutch/site/publish/doap.rdf

2012-07-26 Thread lewismc
Author: lewismc
Date: Thu Jul 26 13:00:46 2012
New Revision: 1365972

URL: http://svn.apache.org/viewvc?rev=1365972view=rev
Log:
trivial commit to update doap.rdf

Modified:
nutch/site/publish/doap.rdf

Modified: nutch/site/publish/doap.rdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/doap.rdf?rev=1365972r1=1365971r2=1365972view=diff
==
--- nutch/site/publish/doap.rdf (original)
+++ nutch/site/publish/doap.rdf Thu Jul 26 13:00:46 2012
@@ -40,6 +40,20 @@ The system can be enhanced (eg other doc
 category rdf:resource=http://projects.apache.org/category/web-framework; 
/
 release
   Version
+nameApache Nutch 1.5.1/name
+created2012-07-10/created
+revision1.5.1/revision
+  /Version
+/release
+release
+  Version
+nameApache Nutch 2.0/name
+created2012-07-07/created
+revision2.0/revision
+  /Version
+/release
+release
+  Version
 nameApache Nutch 1.5/name
 created2012-06-07/created
 revision1.5/revision




svn commit: r1366159 - in /nutch/trunk: CHANGES.txt conf/schema-solr4.xml

2012-07-26 Thread lewismc
Author: lewismc
Date: Thu Jul 26 19:20:44 2012
New Revision: 1366159

URL: http://svn.apache.org/viewvc?rev=1366159view=rev
Log:
NUTCH-1439 Define boost field as type float in schema-solr4.xml

Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/conf/schema-solr4.xml

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1366159r1=1366158r2=1366159view=diff
==
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Jul 26 19:20:44 2012
@@ -2,6 +2,8 @@ Nutch Change Log
 
 (trunk) Current Development:
 
+* NUTCH-1439 Define boost field as type float in schema-solr4.xml (shekhar 
sharma via lewismc)
+
 * NUTCH-1433 Upgrade to Tika 1.2 (jnioche)
 
 * NUTCH-1388 Optionally maintain custom fetch interval despite 
AdaptiveFetchSchedule (markus)

Modified: nutch/trunk/conf/schema-solr4.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/conf/schema-solr4.xml?rev=1366159r1=1366158r2=1366159view=diff
==
--- nutch/trunk/conf/schema-solr4.xml (original)
+++ nutch/trunk/conf/schema-solr4.xml Thu Jul 26 19:20:44 2012
@@ -306,7 +306,7 @@
 !-- core fields --
 field name=segment type=string stored=true indexed=false/
 field name=digest type=string stored=true indexed=false/
-field name=boost type=string stored=true indexed=false/
+field name=boost type=float stored=true indexed=false/
 
 !-- fields for index-basic plugin --
 field name=host type=url stored=false indexed=true/




svn commit: r1366170 - /nutch/branches/2.x/conf/schema-solr4.xml

2012-07-26 Thread lewismc
Author: lewismc
Date: Thu Jul 26 19:37:35 2012
New Revision: 1366170

URL: http://svn.apache.org/viewvc?rev=1366170view=rev
Log:
copy over solr 4 schema.

Added:
nutch/branches/2.x/conf/schema-solr4.xml
  - copied unchanged from r1366169, nutch/trunk/conf/schema-solr4.xml



svn commit: r1364584 - in /nutch/branches/2.x: CHANGES.txt conf/gora-cassandra-mapping.xml conf/gora-sql-mapping.xml

2012-07-23 Thread lewismc
Author: lewismc
Date: Mon Jul 23 11:11:59 2012
New Revision: 1364584

URL: http://svn.apache.org/viewvc?rev=1364584view=rev
Log:
NUTCH-1435 Host jobs throw NullPointerException with MySQL

Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/conf/gora-cassandra-mapping.xml
nutch/branches/2.x/conf/gora-sql-mapping.xml

Modified: nutch/branches/2.x/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1364584r1=1364583r2=1364584view=diff
==
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Mon Jul 23 11:11:59 2012
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 2.1 - Current Development
 
+* NUTCH-1435 Host jobs throw NullPointerException with MySQL (ferdy via 
lewismc)
+
 * NUTCH-1428 GeneratorMapper should not initialize filters/normalizers when 
they are disabled (ferdy)
 
 * NUTCH-1427 Reuse SelectorEntry in Generator. (ferdy)

Modified: nutch/branches/2.x/conf/gora-cassandra-mapping.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/conf/gora-cassandra-mapping.xml?rev=1364584r1=1364583r2=1364584view=diff
==
--- nutch/branches/2.x/conf/gora-cassandra-mapping.xml (original)
+++ nutch/branches/2.x/conf/gora-cassandra-mapping.xml Mon Jul 23 11:11:59 2012
@@ -46,11 +46,11 @@
 field name=score family=f qualifier=s/
 
 !-- super columns --
-field name=markers family=sc qualifier=mk/
+field name=headers family=sc qualifier=h/
 field name=inlinks family=sc qualifier=il/
 field name=outlinks family=sc qualifier=ol/
 field name=metadata family=sc qualifier=mtdt/
-field name=headers family=sc qualifier=h/
+field name=markers family=sc qualifier=mk/
 field name=parseStatus family=sc qualifier=pas/
 field name=protocolStatus family=sc qualifier=prs/
 /class

Modified: nutch/branches/2.x/conf/gora-sql-mapping.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/conf/gora-sql-mapping.xml?rev=1364584r1=1364583r2=1364584view=diff
==
--- nutch/branches/2.x/conf/gora-sql-mapping.xml (original)
+++ nutch/branches/2.x/conf/gora-sql-mapping.xml Mon Jul 23 11:11:59 2012
@@ -47,4 +47,12 @@
 field name=markers column=markers/
 /class
 
+class name=org.apache.nutch.storage.Host keyClass=java.lang.String
+table=host
+  primarykey column=id length=512/
+  field name=metadata column=metadata/
+  field name=inlinks column=inlinks/
+  field name=outlinks column=outlinks/
+/class
+
 /gora-orm




svn commit: r1359704 - in /nutch/site: forrest/src/documentation/content/xdocs/ publish/

2012-07-10 Thread lewismc
Author: lewismc
Date: Tue Jul 10 14:39:10 2012
New Revision: 1359704

URL: http://svn.apache.org/viewvc?rev=1359704view=rev
Log:
Nutch v1.5.1 announcement

Modified:
nutch/site/forrest/src/documentation/content/xdocs/index.xml
nutch/site/forrest/src/documentation/content/xdocs/site.xml
nutch/site/publish/about.html
nutch/site/publish/about.pdf
nutch/site/publish/bot.html
nutch/site/publish/bot.pdf
nutch/site/publish/credits.html
nutch/site/publish/credits.pdf
nutch/site/publish/faq.html
nutch/site/publish/faq.pdf
nutch/site/publish/index.html
nutch/site/publish/index.pdf
nutch/site/publish/issue_tracking.html
nutch/site/publish/issue_tracking.pdf
nutch/site/publish/linkmap.html
nutch/site/publish/linkmap.pdf
nutch/site/publish/mailing_lists.html
nutch/site/publish/mailing_lists.pdf
nutch/site/publish/nightly.html
nutch/site/publish/nightly.pdf
nutch/site/publish/old_downloads.html
nutch/site/publish/old_downloads.pdf
nutch/site/publish/sonar.html
nutch/site/publish/sonar.pdf
nutch/site/publish/tutorial.html
nutch/site/publish/tutorial.pdf
nutch/site/publish/version_control.html
nutch/site/publish/version_control.pdf
nutch/site/publish/wiki.html
nutch/site/publish/wiki.pdf

Modified: nutch/site/forrest/src/documentation/content/xdocs/index.xml
URL: 
http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/index.xml?rev=1359704r1=1359703r2=1359704view=diff
==
--- nutch/site/forrest/src/documentation/content/xdocs/index.xml (original)
+++ nutch/site/forrest/src/documentation/content/xdocs/index.xml Tue Jul 10 
14:39:10 2012
@@ -30,6 +30,15 @@
   section
 
   titleApache Nutch News/title
+  
+   section
+ title10 July 2012 - Apache Nutch v1.5.1 Released/title
+ pThe Apache Nutch PMC are very pleased to announce the release of 
Apache Nutch v1.5.1. This release is a maintainence release of the popular 
1.5.X mainstream version of Nutch which has been widely adopted within the 
community. 
+ Please see the a 
href=http://www.apache.org/dist/nutch/1.5.1/CHANGES.txt;list of changes/a 
made 
+ in this version for a full breakdown. The release is available
+   a href=http://www.apache.org/dyn/closer.cgi/nutch/;here/a.
+/p
+   /section   
 
section
  title07 July 2012 - Apache Nutch v2.0 Released/title

Modified: nutch/site/forrest/src/documentation/content/xdocs/site.xml
URL: 
http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/site.xml?rev=1359704r1=1359703r2=1359704view=diff
==
--- nutch/site/forrest/src/documentation/content/xdocs/site.xml (original)
+++ nutch/site/forrest/src/documentation/content/xdocs/site.xml Tue Jul 10 
14:39:10 2012
@@ -47,7 +47,7 @@ See http://forrest.apache.org/docs/linki
 tutoriallabel=Tutorial href=tutorial.html /
 webmasters  label=Robothref=bot.html /
 apidocs label=API Docs (2.0)href=apidocs-2.0/index.html/
-apidocs label=API Docs (1.5)href=apidocs-1.5/index.html/
+apidocs label=API Docs (1.5.1)href=apidocs-1.5/index.html/
 apidocs label=API Docs (trunk-nightly)  href=ext:nightly-api 
/
 apidocslabel=API Docs (2.0-Dev-nightly) href=ext:nightly-2.0-api 
/
   /docs

Modified: nutch/site/publish/about.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/about.html?rev=1359704r1=1359703r2=1359704view=diff
==
--- nutch/site/publish/about.html (original)
+++ nutch/site/publish/about.html Tue Jul 10 14:39:10 2012
@@ -186,7 +186,7 @@ document.write(Last Published:  + docu
 a href=apidocs-2.0/index.htmlAPI Docs (2.0)/a
 /div
 div class=menuitem
-a href=apidocs-1.5/index.htmlAPI Docs (1.5)/a
+a href=apidocs-1.5/index.htmlAPI Docs (1.5.1)/a
 /div
 div class=menuitem
 a href=https://builds.apache.org/job/Nutch-trunk/javadoc/;API Docs 
(trunk-nightly)/a

Modified: nutch/site/publish/about.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/about.pdf?rev=1359704r1=1359703r2=1359704view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/bot.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/bot.html?rev=1359704r1=1359703r2=1359704view=diff
==
--- nutch/site/publish/bot.html (original)
+++ nutch/site/publish/bot.html Tue Jul 10 14:39:10 2012
@@ -186,7 +186,7 @@ document.write(Last Published:  + docu
 a href=apidocs-2.0/index.htmlAPI Docs (2.0)/a
 /div
 div class=menuitem
-a href=apidocs-1.5/index.htmlAPI Docs (1.5)/a
+a href=apidocs-1.5/index.htmlAPI Docs (1.5.1)/a
 /div
 div class=menuitem

svn commit: r1359746 - in /nutch/branches/2.x: conf/nutch-default.xml conf/schema.xml default.properties

2012-07-10 Thread lewismc
Author: lewismc
Date: Tue Jul 10 16:08:23 2012
New Revision: 1359746

URL: http://svn.apache.org/viewvc?rev=1359746view=rev
Log:
update all versions to 2.1-SNAPSHOT

Modified:
nutch/branches/2.x/conf/nutch-default.xml
nutch/branches/2.x/conf/schema.xml
nutch/branches/2.x/default.properties

Modified: nutch/branches/2.x/conf/nutch-default.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/conf/nutch-default.xml?rev=1359746r1=1359745r2=1359746view=diff
==
--- nutch/branches/2.x/conf/nutch-default.xml (original)
+++ nutch/branches/2.x/conf/nutch-default.xml Tue Jul 10 16:08:23 2012
@@ -125,7 +125,7 @@
 
 property
   namehttp.agent.version/name
-  valueNutch-2.0/value
+  valueNutch-2.1-SNAPSHOT/value
   descriptionA version string to advertise in the User-Agent 
header./description
 /property

Modified: nutch/branches/2.x/conf/schema.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/conf/schema.xml?rev=1359746r1=1359745r2=1359746view=diff
==
--- nutch/branches/2.x/conf/schema.xml (original)
+++ nutch/branches/2.x/conf/schema.xml Tue Jul 10 16:08:23 2012
@@ -27,7 +27,7 @@
 example/solr/conf/schema.xml?view=markup
 for more info.
 --
-schema name=nutch version=2.0
+schema name=nutch version=2.1-SNAPSHOT
 types
 fieldType name=string class=solr.StrField sortMissingLast=true
 omitNorms=true/ 

Modified: nutch/branches/2.x/default.properties
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/default.properties?rev=1359746r1=1359745r2=1359746view=diff
==
--- nutch/branches/2.x/default.properties (original)
+++ nutch/branches/2.x/default.properties Tue Jul 10 16:08:23 2012
@@ -15,7 +15,7 @@
 
 
 name=apache-nutch
-version=2.0
+version=2.1-SNAPSHOT
 final.name=${name}-${version}
 year=2012
 




svn commit: r1359752 - in /nutch/trunk: conf/schema-solr4.xml conf/schema.xml default.properties

2012-07-10 Thread lewismc
Author: lewismc
Date: Tue Jul 10 16:15:24 2012
New Revision: 1359752

URL: http://svn.apache.org/viewvc?rev=1359752view=rev
Log:
update all versions to 1.6-SNAPSHOT

Modified:
nutch/trunk/conf/schema-solr4.xml
nutch/trunk/conf/schema.xml
nutch/trunk/default.properties

Modified: nutch/trunk/conf/schema-solr4.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/conf/schema-solr4.xml?rev=1359752r1=1359751r2=1359752view=diff
==
--- nutch/trunk/conf/schema-solr4.xml (original)
+++ nutch/trunk/conf/schema-solr4.xml Tue Jul 10 16:15:24 2012
@@ -25,7 +25,7 @@
 for more info.
 --
 
-schema name=nutch version=1.4
+schema name=nutch version=1.6-SNAPSHOT
 
   types
 

Modified: nutch/trunk/conf/schema.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/conf/schema.xml?rev=1359752r1=1359751r2=1359752view=diff
==
--- nutch/trunk/conf/schema.xml (original)
+++ nutch/trunk/conf/schema.xml Tue Jul 10 16:15:24 2012
@@ -28,7 +28,7 @@
 example/solr/conf/schema.xml?view=markup
 for more info.
 --
-schema name=nutch version=1.6
+schema name=nutch version=1.6-SNAPSHOT
 types
 fieldType name=string class=solr.StrField sortMissingLast=true
 omitNorms=true/ 

Modified: nutch/trunk/default.properties
URL: 
http://svn.apache.org/viewvc/nutch/trunk/default.properties?rev=1359752r1=1359751r2=1359752view=diff
==
--- nutch/trunk/default.properties (original)
+++ nutch/trunk/default.properties Tue Jul 10 16:15:24 2012
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 name=apache-nutch
-version=1.5.1-SNAPSHOT
+version=1.6-SNAPSHOT
 final.name=${name}-${version}
 year=2012
 




svn commit: r1359760 - in /nutch/trunk: ./ conf/ src/java/org/apache/nutch/metadata/ src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/ src/plugin/protocol-http/src/java/org/apache/nutch

2012-07-10 Thread lewismc
Author: lewismc
Date: Tue Jul 10 16:29:11 2012
New Revision: 1359760

URL: http://svn.apache.org/viewvc?rev=1359760view=rev
Log:
revert NUTCH-1360

Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/conf/nutch-default.xml
nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java

nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java

nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1359760r1=1359759r2=1359760view=diff
==
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Jul 10 16:29:11 2012
@@ -32,8 +32,6 @@ Nutch Change Log
 
 * NUTCH-1364 Add a counter in Generator for malformed urls (lewismc)
 
-* NUTCH-1360 Suport the storing of IP address connected to when web crawling 
(lewismc)
-
 * NUTCH-1262 Map `duplicating` content-types to a single type (markus)
 
 * NUTCH-1385 More robust plug-in order properties in nutch-site.xml (Andy Xue 
via markus)

Modified: nutch/trunk/conf/nutch-default.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/conf/nutch-default.xml?rev=1359760r1=1359759r2=1359760view=diff
==
--- nutch/trunk/conf/nutch-default.xml (original)
+++ nutch/trunk/conf/nutch-default.xml Tue Jul 10 16:29:11 2012
@@ -255,13 +255,6 @@
   /description
 /property
 
-property
-  namehttp.store.ip.address/name
-  valuefalse/value
-  descriptionEnables us to capture the specific IP address of the 
-  host which we connect to to fetch a page./description
-/property
-
 !-- FTP properties --
 
 property

Modified: nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java?rev=1359760r1=1359759r2=1359760view=diff
==
--- nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java Tue Jul 10 
16:29:11 2012
@@ -48,7 +48,5 @@ public interface HttpHeaders {
   public final static String LAST_MODIFIED = Last-Modified;
   
   public final static String LOCATION = Location;
-  
-  public final static String IP_ADDRESS = _ip;
 
 }

Modified: 
nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java?rev=1359760r1=1359759r2=1359760view=diff
==
--- 
nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
 (original)
+++ 
nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
 Tue Jul 10 16:29:11 2012
@@ -80,9 +80,6 @@ public abstract class HttpBase implement
   /** The Accept request header value. */
   protected String accept = 
text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8;
   
-  /** The _ip request header value. */
-  protected boolean ip_header = false;
-
   /** The default logger */
   private final static Logger LOGGER = LoggerFactory.getLogger(HttpBase.class);
 
@@ -123,7 +120,6 @@ public abstract class HttpBase implement
   .get(http.agent.description), conf.get(http.agent.url), 
conf.get(http.agent.email));
   this.acceptLanguage = conf.get(http.accept.language, acceptLanguage);
   this.accept = conf.get(http.accept, accept);
-  this.ip_header = conf.getBoolean(http.store.ip.address, false);
   // backward-compatible default setting
   this.useHttp11 = conf.getBoolean(http.useHttp11, false);
   this.robots.setConf(conf);
@@ -251,10 +247,6 @@ public abstract class HttpBase implement
 return useHttp11;
   }
   
-  public boolean getIP_Header(){
- return ip_header;
-  }
-  
   private static String getAgentString(String agentName,
String agentVersion,
String agentDesc,
@@ -309,7 +301,6 @@ public abstract class HttpBase implement
   logger.info(http.agent =  + userAgent);
   logger.info(http.accept.language =  + acceptLanguage);
   logger.info(http.accept =  + accept);
-  logger.info(http.store.ip.address =  + ip_header);
 }
   }
   

Modified: 
nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java?rev=1359760r1=1359759r2=1359760view=diff
==
--- 
nutch/trunk/src/plugin/protocol-http

svn commit: r1358658 - in /nutch/site: forrest/src/documentation/content/xdocs/ publish/ publish/apidocs-2.0/ publish/apidocs-2.0/org/ publish/apidocs-2.0/org/apache/ publish/apidocs-2.0/org/apache/nu

2012-07-07 Thread lewismc
Author: lewismc
Date: Sat Jul  7 22:28:29 2012
New Revision: 1358658

URL: http://svn.apache.org/viewvc?rev=1358658view=rev
Log:
commit for 2.0 release


[This commit notification would consist of 146 parts, 
which exceeds the limit of 50 ones, so it was shortened to the summary.]


svn commit: r1356855 - /nutch/branches/branch-1.5.1/build.xml

2012-07-03 Thread lewismc
Author: lewismc
Date: Tue Jul  3 18:15:49 2012
New Revision: 1356855

URL: http://svn.apache.org/viewvc?rev=1356855view=rev
Log:
NUTCH-1415-v2

Modified:
nutch/branches/branch-1.5.1/build.xml

Modified: nutch/branches/branch-1.5.1/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/build.xml?rev=1356855r1=1356854r2=1356855view=diff
==
--- nutch/branches/branch-1.5.1/build.xml (original)
+++ nutch/branches/branch-1.5.1/build.xml Tue Jul  3 18:15:49 2012
@@ -703,11 +703,11 @@
   target name=tar-bin depends=package-bin
 tar compression=gzip longfile=gnu
   destfile=${bin.dist.version.dir}.tar.gz
-  tarfileset dir=${bin.dist.version.dir} mode=664
+  tarfileset dir=${bin.dist.version.dir} mode=664 
prefix=${final.name}
exclude name=bin/* /
 include name=** /
   /tarfileset
-  tarfileset dir=${bin.dist.version.dir} mode=755
+  tarfileset dir=${bin.dist.version.dir} mode=755 
prefix=${final.name}
 include name=bin/* /
   /tarfileset
 /tar
@@ -735,11 +735,11 @@
   target name=zip-bin depends=package-bin
zip compress=true casesensitive=yes 
  destfile=${bin.dist.version.dir}.zip
-   zipfileset dir=${bin.dist.version.dir} filemode=664
+   zipfileset dir=${bin.dist.version.dir} filemode=664 
prefix=${final.name}
exclude name=bin/* /
include name=** /
/zipfileset
-   zipfileset dir=${bin.dist.version.dir} filemode=755
+   zipfileset dir=${src.dist.version.dir} filemode=755 
prefix=${final.name}
include name=bin/* /
/zipfileset
/zip




svn commit: r1356863 - /nutch/tags/release-1.5.1-rc3/

2012-07-03 Thread lewismc
Author: lewismc
Date: Tue Jul  3 18:28:37 2012
New Revision: 1356863

URL: http://svn.apache.org/viewvc?rev=1356863view=rev
Log:
tag for apache-nutch-1.5.1-rc3

Added:
nutch/tags/release-1.5.1-rc3/
  - copied from r1356862, nutch/branches/branch-1.5.1/



svn commit: r1356338 - /nutch/branches/branch-1.5.1/

2012-07-02 Thread lewismc
Author: lewismc
Date: Mon Jul  2 16:48:49 2012
New Revision: 1356338

URL: http://svn.apache.org/viewvc?rev=1356338view=rev
Log:
remove old/incorrect branch-1.5.1

Removed:
nutch/branches/branch-1.5.1/



svn commit: r1356343 - in /nutch/branches/branch-1.5.1: CHANGES.txt ivy/ivy.xml

2012-07-02 Thread lewismc
Author: lewismc
Date: Mon Jul  2 16:56:21 2012
New Revision: 1356343

URL: http://svn.apache.org/viewvc?rev=1356343view=rev
Log:
backport of NUTCH-1398 Upgrade to Hadoop 1.0.3

Modified:
nutch/branches/branch-1.5.1/CHANGES.txt
nutch/branches/branch-1.5.1/ivy/ivy.xml

Modified: nutch/branches/branch-1.5.1/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/CHANGES.txt?rev=1356343r1=1356342r2=1356343view=diff
==
--- nutch/branches/branch-1.5.1/CHANGES.txt (original)
+++ nutch/branches/branch-1.5.1/CHANGES.txt Mon Jul  2 16:56:21 2012
@@ -1,5 +1,9 @@
 Nutch Change Log
 
+Release 1.5.1 - 07/02/2012
+
+* NUTCH-1398 Upgrade to Hadoop 1.0.3 (jnioche)
+
 Release 1.5 - 04/15/2012
 
 * NUTCH-1208 Don't include KEYS file in bin distribution (jnioche)

Modified: nutch/branches/branch-1.5.1/ivy/ivy.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/ivy/ivy.xml?rev=1356343r1=1356342r2=1356343view=diff
==
--- nutch/branches/branch-1.5.1/ivy/ivy.xml (original)
+++ nutch/branches/branch-1.5.1/ivy/ivy.xml Mon Jul  2 16:56:21 2012
@@ -43,7 +43,7 @@
dependency org=commons-codec name=commons-codec rev=1.3
conf=*-default /
 
-   dependency org=org.apache.hadoop name=hadoop-core 
rev=1.0.0
+   dependency org=org.apache.hadoop name=hadoop-core 
rev=1.0.3
conf=*-default
exclude org=hsqldb name=hsqldb /
exclude org=net.sf.kosmosfs name=kfs /
@@ -67,7 +67,7 @@
 
!--artifacts needed for testing --
dependency org=junit name=junit rev=3.8.1 
conf=*-default /
-   dependency org=org.apache.hadoop name=hadoop-test 
rev=1.0.0
+   dependency org=org.apache.hadoop name=hadoop-test 
rev=1.0.3
conf=test-default /
 
dependency org=org.mortbay.jetty name=jetty rev=6.1.22




svn commit: r1356363 - in /nutch/branches/branch-1.5.1: CHANGES.txt pom.xml src/bin/nutch

2012-07-02 Thread lewismc
Author: lewismc
Date: Mon Jul  2 17:23:10 2012
New Revision: 1356363

URL: http://svn.apache.org/viewvc?rev=1356363view=rev
Log:
NUTCH-1404 Nutch script fails to find job file in deploy mode

Modified:
nutch/branches/branch-1.5.1/CHANGES.txt
nutch/branches/branch-1.5.1/pom.xml
nutch/branches/branch-1.5.1/src/bin/nutch

Modified: nutch/branches/branch-1.5.1/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/CHANGES.txt?rev=1356363r1=1356362r2=1356363view=diff
==
--- nutch/branches/branch-1.5.1/CHANGES.txt (original)
+++ nutch/branches/branch-1.5.1/CHANGES.txt Mon Jul  2 17:23:10 2012
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 1.5.1 - 07/02/2012
 
+* NUTCH-1404 Nutch script fails to find job file in deploy mode (sidabatra, 
jnioche)
+
 * NUTCH-1415 release packages to contain top level folder apache-nutch-x.x 
(snagel via lewismc)
 
 * NUTCH-1400 Remove developer -core option for bin/nutch (jnioche)

Modified: nutch/branches/branch-1.5.1/pom.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/pom.xml?rev=1356363r1=1356362r2=1356363view=diff
==
--- nutch/branches/branch-1.5.1/pom.xml (original)
+++ nutch/branches/branch-1.5.1/pom.xml Mon Jul  2 17:23:10 2012
@@ -22,7 +22,7 @@
   groupIdorg.apache.nutch/groupId
   artifactIdnutch/artifactId
   packagingjar/packaging
-  version1.5/version
+  version1.5.1/version
   nameApache Nutch/name
   urlhttp://nutch.apache.org/url
   licenses
@@ -149,7 +149,7 @@
 dependency
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-core/artifactId
-version1.0.0/version
+version1.0.3/version
 optionaltrue/optional
 /dependency
 dependency
@@ -203,7 +203,7 @@
 dependency
 groupIdorg.apache.hadoop/groupId
 artifactIdhadoop-test/artifactId
-version1.0.0/version
+version1.0.3/version
 optionaltrue/optional
 /dependency
 dependency

Modified: nutch/branches/branch-1.5.1/src/bin/nutch
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/src/bin/nutch?rev=1356363r1=1356362r2=1356363view=diff
==
--- nutch/branches/branch-1.5.1/src/bin/nutch (original)
+++ nutch/branches/branch-1.5.1/src/bin/nutch Mon Jul  2 17:23:10 2012
@@ -101,9 +101,9 @@ fi
 local=true
 
 # NUTCH_JOB 
-if [ -f ${NUTCH_HOME}/nutch-*.job ]; then
+if [ -f ${NUTCH_HOME}/*nutch*.job ]; then
 local=false
-  for f in $NUTCH_HOME/nutch-*.job; do
+  for f in $NUTCH_HOME/*nutch*.job; do
 NUTCH_JOB=$f;
   done
 fi




svn commit: r1353619 - /nutch/branches/branch-1.5.1/pom.xml

2012-06-25 Thread lewismc
Author: lewismc
Date: Mon Jun 25 15:56:23 2012
New Revision: 1353619

URL: http://svn.apache.org/viewvc?rev=1353619view=rev
Log:
commit to sync pom.xml with Ivy deps

Modified:
nutch/branches/branch-1.5.1/pom.xml

Modified: nutch/branches/branch-1.5.1/pom.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/pom.xml?rev=1353619r1=1353618r2=1353619view=diff
==
--- nutch/branches/branch-1.5.1/pom.xml (original)
+++ nutch/branches/branch-1.5.1/pom.xml Mon Jun 25 15:56:23 2012
@@ -15,286 +15,215 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 --
-project xmlns=http://maven.apache.org/POM/4.0.0; 
xmlns:xsi=http://www.w3.org/2001/XMLSchema-instance; 
xsi:schemaLocation=http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd;
+project xmlns=http://maven.apache.org/POM/4.0.0; 
xmlns:xsi=http://www.w3.org/2001/XMLSchema-instance;
+xsi:schemaLocation=http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd;
 
-   modelVersion4.0.0/modelVersion
-   parent
-   groupIdorg.apache/groupId
-   artifactIdapache/artifactId
-   version9/version
-   relativePath /
-   /parent   
-   groupIdorg.apache.nutch/groupId
-   artifactIdnutch/artifactId
-   packagingjar/packaging
-   version1.6-SNAPSHOT/version
-   nameApache Nutch/name
-   urlhttp://nutch.apache.org/url
-   licenses
-   license
-   nameThe Apache Software License, Version 2.0/name
-   
urlhttp://www.apache.org/licenses/LICENSE-2.0.txt/url
-   distributionrepo/distribution
-   /license
-   /licenses
-   scm
-   
connectionscm:svn:http://svn.apache.org/repos/asf/nutch/trunk//connection
-   
developerConnectionscm:svn:https://svn.apache.org/repos/asf/nutch/trunk//developerConnection
-   urlhttp://svn.apache.org/viewvc/nutch/trunk//url
-   /scm
-   developers
+  modelVersion4.0.0/modelVersion
+  groupIdorg.apache.nutch/groupId
+  artifactIdnutch/artifactId
+  packagingjar/packaging
+  version1.5.1/version
+  nameApache Nutch/name
+  urlhttp://nutch.apache.org/url
+  licenses
+ license
+   nameThe Apache Software License, Version 2.0/name
+   urlhttp://www.apache.org/licenses/LICENSE-2.0.txt/url
+   distributionrepo/distribution
+ /license
+  /licenses
+  scm
+ urlhttp://svn.apache.org/viewvc/nutch/url
+ connectionhttp://svn.apache.org/viewvc/nutch/connection
+  /scm
+  developers
developer
idab/id
nameAndrzej Bialecki/name
emaila...@apache.org/email
/developer
developer
-   idmattmann/id
-   nameChris A. Mattmann/name
-   emailmattm...@apache.org/email
-   /developer
-   developer
-   idkubes/id
-   nameDennis Kubes/name
-   emailku...@apache.org/email
-   /developer
-   developer
+idalexis/id
+nameAlexis Detlegrode/name
+emailale...@apache.org/email
+/developer
+developer
iddogacan/id
-   nameDogacan Güney/name
+   nameDogacan Güney/name
emaildoga...@apache.org/email
/developer
developer
+idferdy/id
+nameFerdy Galema/name
+emailfe...@apache.org/email
+/developer
+developer
idjnioche/id
nameJulien Nioche/name
emailjnio...@apache.org/email
/developer
developer
-   idsiren/id
-   nameSami Siren/name
-   emailsi...@apache.org/email
+   idkubes/id
+   nameDennis Kubes/name
+   emailku...@apache.org/email
/developer
developer
-   idmarkus/id
-   nameMarkus Jelsma/name
-   emailmar...@apache.org/email
-   /developer
+idlewismc/id
+nameLewis John McGibbney/name
+emaillewi...@apache.org/email
+/developer
+   developer
+idmarkus/id
+nameMarkus Jelsma/name
+emailmar...@apache.org/email
+/developer

svn commit: r1353615 - in /nutch/branches/branch-1.5.1: CHANGES.txt conf/nutch-default.xml conf/schema.xml default.properties

2012-06-25 Thread lewismc
Author: lewismc
Date: Mon Jun 25 15:52:52 2012
New Revision: 1353615

URL: http://svn.apache.org/viewvc?rev=1353615view=rev
Log:
commit to set up RC

Modified:
nutch/branches/branch-1.5.1/CHANGES.txt
nutch/branches/branch-1.5.1/conf/nutch-default.xml
nutch/branches/branch-1.5.1/conf/schema.xml
nutch/branches/branch-1.5.1/default.properties

Modified: nutch/branches/branch-1.5.1/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/CHANGES.txt?rev=1353615r1=1353614r2=1353615view=diff
==
--- nutch/branches/branch-1.5.1/CHANGES.txt (original)
+++ nutch/branches/branch-1.5.1/CHANGES.txt Mon Jun 25 15:52:52 2012
@@ -1,6 +1,6 @@
 Nutch Change Log
 
-(trunk) Current Development:
+Release 1.5.1 - 25/06/2012 - ddmm
 
 * NUTCH-1400 Remove developer -core option for bin/nutch (jnioche)
 

Modified: nutch/branches/branch-1.5.1/conf/nutch-default.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/conf/nutch-default.xml?rev=1353615r1=1353614r2=1353615view=diff
==
--- nutch/branches/branch-1.5.1/conf/nutch-default.xml (original)
+++ nutch/branches/branch-1.5.1/conf/nutch-default.xml Mon Jun 25 15:52:52 2012
@@ -123,7 +123,7 @@
 
 property
   namehttp.agent.version/name
-  valueNutch-1.6-SNAPSHOT/value
+  valueNutch-1.5.1/value
   descriptionA version string to advertise in the User-Agent 
header./description
 /property

Modified: nutch/branches/branch-1.5.1/conf/schema.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/conf/schema.xml?rev=1353615r1=1353614r2=1353615view=diff
==
--- nutch/branches/branch-1.5.1/conf/schema.xml (original)
+++ nutch/branches/branch-1.5.1/conf/schema.xml Mon Jun 25 15:52:52 2012
@@ -28,7 +28,7 @@
 example/solr/conf/schema.xml?view=markup
 for more info.
 --
-schema name=nutch version=1.6
+schema name=nutch version=1.5.1
 types
 fieldType name=string class=solr.StrField sortMissingLast=true
 omitNorms=true/ 

Modified: nutch/branches/branch-1.5.1/default.properties
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.5.1/default.properties?rev=1353615r1=1353614r2=1353615view=diff
==
--- nutch/branches/branch-1.5.1/default.properties (original)
+++ nutch/branches/branch-1.5.1/default.properties Mon Jun 25 15:52:52 2012
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 name=apache-nutch
-version=1.5.1-SNAPSHOT
+version=1.5.1
 final.name=${name}-${version}
 year=2012
 




svn commit: r1353638 - /nutch/tags/release-2.0rc3/

2012-06-25 Thread lewismc
Author: lewismc
Date: Mon Jun 25 16:24:14 2012
New Revision: 1353638

URL: http://svn.apache.org/viewvc?rev=1353638view=rev
Log:
tagging Nutch 2.0 RC3

Added:
nutch/tags/release-2.0rc3/
  - copied from r1353637, nutch/branches/nutchgora/



svn commit: r1350600 - /nutch/tags/release-2.0rc2/

2012-06-15 Thread lewismc
Author: lewismc
Date: Fri Jun 15 12:32:13 2012
New Revision: 1350600

URL: http://svn.apache.org/viewvc?rev=1350600view=rev
Log:
Nutch 2.0 RC2.

Added:
nutch/tags/release-2.0rc2/
  - copied from r1350599, nutch/branches/nutchgora/



svn commit: r1348070 - in /nutch/trunk: conf/schema.xml default.properties

2012-06-08 Thread lewismc
Author: lewismc
Date: Fri Jun  8 13:47:20 2012
New Revision: 1348070

URL: http://svn.apache.org/viewvc?rev=1348070view=rev
Log:
trivial commit to add license header and update schema number

Modified:
nutch/trunk/conf/schema.xml
nutch/trunk/default.properties

Modified: nutch/trunk/conf/schema.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/conf/schema.xml?rev=1348070r1=1348069r2=1348070view=diff
==
--- nutch/trunk/conf/schema.xml (original)
+++ nutch/trunk/conf/schema.xml Fri Jun  8 13:47:20 2012
@@ -28,7 +28,7 @@
 example/solr/conf/schema.xml?view=markup
 for more info.
 --
-schema name=nutch version=1.4
+schema name=nutch version=1.6
 types
 fieldType name=string class=solr.StrField sortMissingLast=true
 omitNorms=true/ 

Modified: nutch/trunk/default.properties
URL: 
http://svn.apache.org/viewvc/nutch/trunk/default.properties?rev=1348070r1=1348069r2=1348070view=diff
==
--- nutch/trunk/default.properties (original)
+++ nutch/trunk/default.properties Fri Jun  8 13:47:20 2012
@@ -1,3 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the License); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 name=apache-nutch
 version=1.6-SNAPSHOT
 final.name=${name}-${version}




svn commit: r1348074 - in /nutch/branches/nutchgora: CHANGES.txt build.xml conf/schema.xml

2012-06-08 Thread lewismc
Author: lewismc
Date: Fri Jun  8 13:56:20 2012
New Revision: 1348074

URL: http://svn.apache.org/viewvc?rev=1348074view=rev
Log:
trivial commit prior to RC#1

Modified:
nutch/branches/nutchgora/CHANGES.txt
nutch/branches/nutchgora/build.xml
nutch/branches/nutchgora/conf/schema.xml

Modified: nutch/branches/nutchgora/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1348074r1=1348073r2=1348074view=diff
==
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Fri Jun  8 13:56:20 2012
@@ -1,6 +1,7 @@
 Nutch Change Log
 
-Release 2.1 (22/02/2012)
+Release 2.0 (08/06/2012) ddmmyyy
+Full Jira report - 
https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=10680version=12314893
 
 * NUTCH-1379 NPE when reprUrl is null in ParseUtil (ferdy)
 

Modified: nutch/branches/nutchgora/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1348074r1=1348073r2=1348074view=diff
==
--- nutch/branches/nutchgora/build.xml (original)
+++ nutch/branches/nutchgora/build.xml Fri Jun  8 13:56:20 2012
@@ -608,7 +608,6 @@
 mkdir dir=${dist.dir}/
 mkdir dir=${src.dist.version.dir}/
 mkdir dir=${src.dist.version.dir}/lib/
-mkdir dir=${src.dist.version.dir}/runtime/
 mkdir dir=${src.dist.version.dir}/docs/
 mkdir dir=${src.dist.version.dir}/docs/api/
 mkdir dir=${src.dist.version.dir}/ivy/
@@ -616,15 +615,6 @@
 copy todir=${src.dist.version.dir}/lib includeEmptyDirs=false
   fileset dir=lib/
 /copy
-   
-copy todir=${src.dist.version.dir}/runtime
-  fileset dir=runtime/
-/copy
-
-chmod perm=ugo+x type=file
-fileset dir=${src.dist.version.dir}/runtime/deploy/bin/
-fileset dir=${src.dist.version.dir}/runtime/local/bin/
-/chmod
 
 copy todir=${src.dist.version.dir}/conf
   fileset dir=${conf.dir} excludes=**/*.template/
@@ -704,6 +694,7 @@
   destfile=${src.dist.version.dir}.tar.gz 
basedir=${src.dist.version.dir}
   tarfileset dir=${dist.dir} mode=664
exclude name=${src.dist.version.dir}/bin/* /
+   exclude name=${src.dist.version.dir}/runtime/* /
 include name=${src.dist.version.dir}/** /
   /tarfileset
   tarfileset dir=${dist.dir} mode=755
@@ -736,6 +727,7 @@
destfile=${src.dist.version.dir}.zip basedir=${src.dist.version.dir}
zipfileset dir=${dist.dir} filemode=664
exclude name=${src.dist.version.dir}/bin/* /
+   exclude name=${src.dist.version.dir}/runtime/* /
include name=${src.dist.version.dir}/** /
/zipfileset
zipfileset dir=${dist.dir} filemode=755

Modified: nutch/branches/nutchgora/conf/schema.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/schema.xml?rev=1348074r1=1348073r2=1348074view=diff
==
--- nutch/branches/nutchgora/conf/schema.xml (original)
+++ nutch/branches/nutchgora/conf/schema.xml Fri Jun  8 13:56:20 2012
@@ -27,7 +27,7 @@
 example/solr/conf/schema.xml?view=markup
 for more info.
 --
-schema name=nutch version=1.4
+schema name=nutch version=2.0
 types
 fieldType name=string class=solr.StrField sortMissingLast=true
 omitNorms=true/ 




svn commit: r1348087 - /nutch/tags/release-2.0/nutchgora/

2012-06-08 Thread lewismc
Author: lewismc
Date: Fri Jun  8 14:23:13 2012
New Revision: 1348087

URL: http://svn.apache.org/viewvc?rev=1348087view=rev
Log:
Nutch 2.0 release.

Added:
nutch/tags/release-2.0/nutchgora/
  - copied from r1348086, nutch/branches/nutchgora/



svn commit: r1348095 - /nutch/branches/nutchgora/KEYS

2012-06-08 Thread lewismc
Author: lewismc
Date: Fri Jun  8 14:42:59 2012
New Revision: 1348095

URL: http://svn.apache.org/viewvc?rev=1348095view=rev
Log:
trivial commit to add my details to KEYS file

Modified:
nutch/branches/nutchgora/KEYS

Modified: nutch/branches/nutchgora/KEYS
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/KEYS?rev=1348095r1=1348094r2=1348095view=diff
==
--- nutch/branches/nutchgora/KEYS (original)
+++ nutch/branches/nutchgora/KEYS Fri Jun  8 14:42:59 2012
@@ -240,3 +240,62 @@ vIxV0MEa+WSBaplFlj0ACfs2Hdh6skErvMAzXHBC
 oPLz8+OED4Ec4Pjuuic3HX2Ff0myAKDd74+vKoAC78+CStjf1pSSmy4y4w==
 =mbQZ
 -END PGP PUBLIC KEY BLOCK-
+
+pub   4096R/C601BCA7 2012-04-17
+uid  Lewis John McGibbney (CODE SIGNING KEY) 
lewi...@apache.org
+sig 3C601BCA7 2012-04-17  Lewis John McGibbney (CODE SIGNING KEY) 
lewi...@apache.org
+sub   4096R/FCD9FF28 2012-04-17
+sig  C601BCA7 2012-04-17  Lewis John McGibbney (CODE SIGNING KEY) 
lewi...@apache.org
+
+-BEGIN PGP PUBLIC KEY BLOCK-
+Version: GnuPG v1.4.10 (GNU/Linux)
+
+mQINBE+NSUkBEAC3Qu1mT3x0swS4zXta2NnJtrepOqpsU292U+hzkbjdG8W+W2WA
+3oRdd5f/iKkkE1Z3q53qD++PazLQf+g+378Ce+CP4bwhZuz/CgSa8EO2rIXadVUG
+M+XBAiSlLWyQhwW8qbipGQvpT1PXp8mjwXlWzt+0+4F9ybepYxStUPaybIFfSn+f
+M8YzYLgfKSsHMgPeK6TGRJAqC+u7t+XMYWmfVS9TpoOyfZ3tsn3YmeH4JiqF49/0
+XzkqgM7FW52By64Nm6xCOfqXCaMmVV5JRuZFhLB4VmWlH/Mikv5Tu99gsAdGwFIb
+MhMWtWZ/azKarTkQiZjDka09Mxc6skXCBBbxz9lstE4X50d5PMqOgVBtFstmL64h
+Km2dSIdVEUyjM9y1HBRZO1+ooNs5xja1DnSAuytstrRnt5Vdnuk/RS8t2qfcm2jP
+NWrZNOix9U+pT7qUQ1wbK/ew+qWbNFlvp9i3XyZdfPpyEmYD4CsBvkVbiH+FULwS
+F4OJQlJoDJ1vHnSPMNSGtiNRTLSQ2+E6huqktyAY+rcTamCEkCdoZ5NTyMbEgqZ1
+P4fr+h+EpV0h/ACzjhE4sq6MK6KZFv3a3Erlk4oC93BVJpcYyZyQneKQSapbAv9u
+oYCTLHyCrBdXItnFEHhy1zN0DvbWoGtsxDvAVjY3D9YP32Yu3WvxeW25bQARAQAB
+tDxMZXdpcyBKb2huIE1jR2liYm5leSAoQ09ERSBTSUdOSU5HIEtFWSkgPGxld2lz
+bWNAYXBhY2hlLm9yZz6JAjgEEwECACIFAk+NSUkCGwMGCwkIBwMCBhUIAgkKCwQW
+AgMBAh4BAheAAAoJEPReeXDGAbynxnQP/1s1e1eDUAvZv1k+OVhG+nDhqtBtmFV6
+sx67atpzZCj6ckKXphkiWAFmYsAH7pujHgASuAIoMY7MLjaRuG2MiEdWINYH5LVB
+xmZ3M9f1+YBuTSs/0KKBfqVBYm5vbEC+vBkjez54DOJ7OfRQllra98FR5GxEoYhh
+bIQDtUtYrLjzd9kbUH5J+cTgSJ08ciIxanscvFRE7+X2sQTopor6f+o7iea7k6KM
+b5FJ9mi4Q3RQbkorncyyDp4O7rBsuaGeD2oORdSM1zT5ql3glq7cYUI8havHY696
+jWYLOc951l6fDofGi4ZirX0+Mlxj+d2BNY54rx9dl6pZOmahvD4pveq/vbzwOH9E
+vb1uTfRIYLaNW++1nXzPBZ5nzsemDb3K8yVYXnCDrqmzOZMJu5AinvUUusTrRhT/
+4oy2AO1YEIjgwHFzYvv7C7/wYSQC5AxvO0plvyH/kMK/vQk3H7I13isHdyZhEjrR
+e+ciNzPWh4R6W8zVbe29MljItmINWniJ/CnYi9/r7ZtkQUBUCmHQZcsCm2DflA83
+ueLozFY3NH2eQ4q9dY8QIJDOpsX1SrP8DUOpuai3PvEiE8stHxGpamFq2DgnS81x
+/e/kSbIBD6QGgP1S7Zrkdz4jriCCY4mv9mYMu9De/sObYcpGdg6rE49lz9NWeE8w
+Wtt1oexR6DhpuQINBE+NSUkBEADOm92hnYd9ZNSmaVSUegmo0Rx9CMIzRZzHXPXT
+SxxMnJScWDKeTWa7U1A0peiNIUKKlgFcnUY176o4wk8y2sNgyYkYO6wQlzmoyQIh
+Ft0fqE3LMKBJcW2JONWFVrFZpRPTFvRWnDOSur8IQq3rJkyiqfT5y0E7PAdd8aa3
+l7anp8gfKCf9iIYtgfNsKNphngkwOLNDVsED7G/VRfAezjDKyf0M9HSL0fjQ5YDe
+L5MMmgduvYKBtWISM5tqJAunkMpGeWJ6/khJZT+bLK8iLM2073W5uSlNs6oO2AM8
+lDvfmnsFC4178mbU9nJNi+KAXzwZXH4xcqywRKZhuWI5BVPGi50HJ/RIZtDyrkrK
+W7NACtmniuFzSy9PxrM2iappUsfY8b7uZBzGoo1BzT7F7VM7sSte+X+zs8TZ0dam
+6TbuGMuv5rPQGAwu2JWUNOeBzXvfkg3gzk4qZrBdHtUrQjx33c1NBZddLcoSqzgC
+ph2cz4NG4Fs/Mi8SXoKBwJGVeWE+ZCBma8vFP/zctb/XroIaFSE5rAwHydwCB4gu
+VB3rNuLCoiiB50lPzAPFjjFxOuZeTZfl4bp1XRE1KKYi+n974At4HDd5g0Az8w37
+5/9G+pARCzjytvIHJTYQDsG0hfnj2Vfb5WWYF6LMib0ZGf739Yp7L602/yE9QAKm
+bifPCQARAQABiQIfBBgBAgAJBQJPjUlJAhsMAAoJEPReeXDGAbynzc4P/AomVPfY
+bY61TE+QSKAJl8/dyyw+LSddTPFTleVBFHlq1tnQmLWxoNq5t1CRXUJOv3q6haPE
+PLKR5pXXtNzAGVP74Jipa5r8FQjBG0j+XriiHmr861xyno0uPG23c0LSRqHrcLi6
+tgN2Q2ihu1Tjaql+ukzPI6u2v97FD0qhJWKvFFo64p7HTNUXHJLQ9N/m1Pien7Nm
+KFLRI0Pu0CW95I1w2gAAlS++lIxT3/ANfw6SpK9+lNBaan1g0xM5/P54MIQvZgCQ
+gdIcWdAOmXjTyMryconkeNRWpkYjXG4hZj9crP48j3lZPlUYol4pdkQ1CtSq1emv
+VDGoUrn5bRWoybOFfx3joOLpUqJA5PDjeN7YMpJNWc3O/lz+S+sW9WZY7vwbK+Mn
+E/l4Bz2k9fQDsxm2rPzM2aS/qaBo9v7vj+NE85B2/NE9cXo0WoC8u5o+KEQY6urV
+ANW/A0k94wmfoBMbmzNZ5Y5zJ9vceW9d4FE2FXaynRke2awYHBZE2Ty3MSxCQAvp
+MREQKzxB1XcR+Frj0nMKMmdEmM55OmIgAqAct1OuGDbOATJMcmVuwHqTZIdynzqh
+NPgXHx4ASqesjF/9GUrAQfOmXqHdOF6xOb7YYGssl1kgvOQRVJhkWtmTckyk+xu9
+U3Wt+q9F6O+RmemV6a6mrpog+Aq+BkIMWCJ8
+=xHbT
+-END PGP PUBLIC KEY BLOCK-




svn commit: r1344886 - /nutch/branches/branch-1.5/build.xml

2012-05-31 Thread lewismc
Author: lewismc
Date: Thu May 31 20:08:26 2012
New Revision: 1344886

URL: http://svn.apache.org/viewvc?rev=1344886view=rev
Log:
commit to finalise ant tar-src and ant zip-src targets for RC4

Modified:
nutch/branches/branch-1.5/build.xml

Modified: nutch/branches/branch-1.5/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.5/build.xml?rev=1344886r1=1344885r2=1344886view=diff
==
--- nutch/branches/branch-1.5/build.xml (original)
+++ nutch/branches/branch-1.5/build.xml Thu May 31 20:08:26 2012
@@ -603,7 +603,6 @@
 mkdir dir=${dist.dir}/
 mkdir dir=${src.dist.version.dir}/
 mkdir dir=${src.dist.version.dir}/lib/
-mkdir dir=${src.dist.version.dir}/runtime/
 mkdir dir=${src.dist.version.dir}/docs/
 mkdir dir=${src.dist.version.dir}/docs/api/
 mkdir dir=${src.dist.version.dir}/ivy/
@@ -612,15 +611,6 @@
   fileset dir=lib/
 /copy

-copy todir=${src.dist.version.dir}/runtime
-  fileset dir=runtime/
-/copy
-
-chmod perm=ugo+x type=file
-fileset dir=${src.dist.version.dir}/runtime/deploy/bin/
-fileset dir=${src.dist.version.dir}/runtime/local/bin/
-/chmod
-
 copy todir=${src.dist.version.dir}/conf
   fileset dir=${conf.dir} excludes=**/*.template/
 /copy
@@ -699,6 +689,7 @@
   destfile=${src.dist.version.dir}.tar.gz 
basedir=${src.dist.version.dir}
   tarfileset dir=${dist.dir} mode=664
exclude name=${src.dist.version.dir}/bin/* /
+   exclude name=${src.dist.version.dir}/runtime/* /
 include name=${src.dist.version.dir}/** /
   /tarfileset
   tarfileset dir=${dist.dir} mode=755
@@ -731,6 +722,7 @@
  destfile=${src.dist.version.dir}.zip basedir=${src.dist.version.dir}
zipfileset dir=${dist.dir} filemode=664
exclude name=${src.dist.version.dir}/bin/* /
+   exclude name=${src.dist.version.dir}/runtime/* /
include name=${src.dist.version.dir}/** /
/zipfileset
zipfileset dir=${dist.dir} filemode=755




svn commit: r1344451 - in /nutch/branches/branch-1.5: build.xml default.properties

2012-05-30 Thread lewismc
Author: lewismc
Date: Wed May 30 20:28:39 2012
New Revision: 1344451

URL: http://svn.apache.org/viewvc?rev=1344451view=rev
Log:
commit to fix broken ant targets

Modified:
nutch/branches/branch-1.5/build.xml
nutch/branches/branch-1.5/default.properties

Modified: nutch/branches/branch-1.5/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.5/build.xml?rev=1344451r1=1344450r2=1344451view=diff
==
--- nutch/branches/branch-1.5/build.xml (original)
+++ nutch/branches/branch-1.5/build.xml Wed May 30 20:28:39 2012
@@ -420,7 +420,7 @@
 ant dir=src/plugin target=test inheritAll=false/
   /target
 
-  target name=nightly depends=test, tar
+  target name=nightly depends=test, tar-src, zip-src
   /target
 
   !-- == --
@@ -599,123 +599,158 @@
   !-- == --
   !----
   !-- == --
-  target name=package depends=runtime, javadoc
-mkdir dir=${dist.version.dir}/
-mkdir dir=${dist.version.dir}/lib/
-mkdir dir=${dist.version.dir}/runtime/
-mkdir dir=${dist.version.dir}/docs/
-mkdir dir=${dist.version.dir}/docs/api/
-mkdir dir=${dist.version.dir}/ivy/
+  target name=package-src depends=runtime, javadoc
+mkdir dir=${dist.dir}/
+mkdir dir=${src.dist.version.dir}/
+mkdir dir=${src.dist.version.dir}/lib/
+mkdir dir=${src.dist.version.dir}/runtime/
+mkdir dir=${src.dist.version.dir}/docs/
+mkdir dir=${src.dist.version.dir}/docs/api/
+mkdir dir=${src.dist.version.dir}/ivy/
 
-copy todir=${dist.version.dir}/lib includeEmptyDirs=false
+copy todir=${src.dist.version.dir}/lib includeEmptyDirs=false
   fileset dir=lib/
 /copy

-copy todir=${dist.version.dir}/runtime
+copy todir=${src.dist.version.dir}/runtime
   fileset dir=runtime/
 /copy
 
 chmod perm=ugo+x type=file
-fileset dir=${dist.version.dir}/runtime/deploy/bin/
-fileset dir=${dist.version.dir}/runtime/local/bin/
+fileset dir=${src.dist.version.dir}/runtime/deploy/bin/
+fileset dir=${src.dist.version.dir}/runtime/local/bin/
 /chmod
 
-copy todir=${dist.version.dir}/conf
+copy todir=${src.dist.version.dir}/conf
   fileset dir=${conf.dir} excludes=**/*.template/
 /copy
 
-copy todir=${dist.version.dir}/docs/api
+copy todir=${src.dist.version.dir}/docs/api
   fileset dir=${build.javadoc}/
 /copy
 
-copy todir=${dist.version.dir}
+copy todir=${src.dist.version.dir}
   fileset dir=.
 include name=*.txt /
 !--include name=KEYS /--
   /fileset
 /copy
 
-copy todir=${dist.version.dir}/src includeEmptyDirs=true
+copy todir=${src.dist.version.dir}/src includeEmptyDirs=true
   fileset dir=src/
 /copy
 
-copy todir=${dist.version.dir}/ivy includeEmptyDirs=true
+copy todir=${src.dist.version.dir}/ivy includeEmptyDirs=true
   fileset dir=ivy/
 /copy
 
-copy todir=${dist.version.dir}/ file=build.xml/
-copy todir=${dist.version.dir}/ file=default.properties/
+copy todir=${src.dist.version.dir}/ file=build.xml/
+copy todir=${src.dist.version.dir}/ file=default.properties/
 
   /target
 
  target name=package-bin depends=runtime, javadoc
-mkdir dir=${dist.version.dir}-bin/
-mkdir dir=${dist.version.dir}-bin/lib/
-mkdir dir=${dist.version.dir}-bin/bin/
-mkdir dir=${dist.version.dir}-bin/conf/
-mkdir dir=${dist.version.dir}-bin/docs/
-mkdir dir=${dist.version.dir}-bin/docs/api/
-mkdir dir=${dist.version.dir}-bin/plugins/
+mkdir dir=${dist.dir}/
+mkdir dir=${bin.dist.version.dir}/
+mkdir dir=${bin.dist.version.dir}/lib/
+mkdir dir=${bin.dist.version.dir}/bin/
+mkdir dir=${bin.dist.version.dir}/conf/
+mkdir dir=${bin.dist.version.dir}/docs/
+mkdir dir=${bin.dist.version.dir}/docs/api/
+mkdir dir=${bin.dist.version.dir}/plugins/
 
-copy todir=${dist.version.dir}-bin/lib includeEmptyDirs=false
+copy todir=${bin.dist.version.dir}/lib includeEmptyDirs=false
   fileset dir=runtime/local/lib/
 /copy

-copy todir=${dist.version.dir}-bin/bin
+copy todir=${bin.dist.version.dir}/bin
   fileset dir=runtime/local/bin/
 /copy
 
 chmod perm=ugo+x type=file
-fileset dir=${dist.version.dir}-bin/bin/
+fileset dir=${bin.dist.version.dir}/bin/
 /chmod
 
-copy todir=${dist.version.dir}-bin/conf
+copy todir=${bin.dist.version.dir}/conf
   fileset dir=runtime/local/conf excludes=**/*.template/
 /copy
 
-copy todir=${dist.version.dir}-bin/docs/api
+copy todir=${bin.dist.version.dir}/docs/api
   fileset dir=${build.javadoc}/
 /copy
 
-copy todir=${dist.version.dir}-bin

svn commit: r1344452 - /nutch/tags/release-1.5-rc3/

2012-05-30 Thread lewismc
Author: lewismc
Date: Wed May 30 20:31:05 2012
New Revision: 1344452

URL: http://svn.apache.org/viewvc?rev=1344452view=rev
Log:
Nutch release-1.5RC3

Added:
nutch/tags/release-1.5-rc3/
  - copied from r1344451, nutch/branches/branch-1.5/



svn commit: r1344477 - in /nutch/trunk: build.xml conf/nutch-default.xml default.properties

2012-05-30 Thread lewismc
Author: lewismc
Date: Wed May 30 21:42:49 2012
New Revision: 1344477

URL: http://svn.apache.org/viewvc?rev=1344477view=rev
Log:
commit to backport release1.5 changes to trunk

Modified:
nutch/trunk/build.xml
nutch/trunk/conf/nutch-default.xml
nutch/trunk/default.properties

Modified: nutch/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1344477r1=1344476r2=1344477view=diff
==
--- nutch/trunk/build.xml (original)
+++ nutch/trunk/build.xml Wed May 30 21:42:49 2012
@@ -422,7 +422,7 @@
 ant dir=src/plugin target=test inheritAll=false/
   /target
 
-  target name=nightly depends=test, tar
+  target name=nightly depends=test, tar-src, zip-src
   /target
 
   !-- == --
@@ -596,96 +596,98 @@
includes=nutch-default.xml style=conf/nutch-conf.xsl/
   /target
 
-  !-- == --
+!-- == --
   !-- D I S T R I B U T I O N--
   !-- == --
   !----
   !-- == --
   target name=package-src depends=runtime, javadoc
-mkdir dir=${dist.version.dir}-src/
-mkdir dir=${dist.version.dir}-src/lib/
-mkdir dir=${dist.version.dir}-src/runtime/
-mkdir dir=${dist.version.dir}-src/docs/
-mkdir dir=${dist.version.dir}-src/docs/api/
-mkdir dir=${dist.version.dir}-src/ivy/
+mkdir dir=${dist.dir}/
+mkdir dir=${src.dist.version.dir}/
+mkdir dir=${src.dist.version.dir}/lib/
+mkdir dir=${src.dist.version.dir}/runtime/
+mkdir dir=${src.dist.version.dir}/docs/
+mkdir dir=${src.dist.version.dir}/docs/api/
+mkdir dir=${src.dist.version.dir}/ivy/
 
-copy todir=${dist.version.dir}-src/lib includeEmptyDirs=false
+copy todir=${src.dist.version.dir}/lib includeEmptyDirs=false
   fileset dir=lib/
 /copy

-copy todir=${dist.version.dir}-src/runtime
+copy todir=${src.dist.version.dir}/runtime
   fileset dir=runtime/
 /copy
 
 chmod perm=ugo+x type=file
-fileset dir=${dist.version.dir}-src/runtime/deploy/bin/
-fileset dir=${dist.version.dir}-src/runtime/local/bin/
+fileset dir=${src.dist.version.dir}/runtime/deploy/bin/
+fileset dir=${src.dist.version.dir}/runtime/local/bin/
 /chmod
 
-copy todir=${dist.version.dir}-src/conf
+copy todir=${src.dist.version.dir}/conf
   fileset dir=${conf.dir} excludes=**/*.template/
 /copy
 
-copy todir=${dist.version.dir}-src/docs/api
+copy todir=${src.dist.version.dir}/docs/api
   fileset dir=${build.javadoc}/
 /copy
 
-copy todir=${dist.version.dir}-src
+copy todir=${src.dist.version.dir}
   fileset dir=.
 include name=*.txt /
 !--include name=KEYS /--
   /fileset
 /copy
 
-copy todir=${dist.version.dir}-src/src includeEmptyDirs=true
+copy todir=${src.dist.version.dir}/src includeEmptyDirs=true
   fileset dir=src/
 /copy
 
-copy todir=${dist.version.dir}-src/ivy includeEmptyDirs=true
+copy todir=${src.dist.version.dir}/ivy includeEmptyDirs=true
   fileset dir=ivy/
 /copy
 
-copy todir=${dist.version.dir}-src/ file=build.xml/
-copy todir=${dist.version.dir}-src/ file=default.properties/
+copy todir=${src.dist.version.dir}/ file=build.xml/
+copy todir=${src.dist.version.dir}/ file=default.properties/
 
   /target
 
  target name=package-bin depends=runtime, javadoc
-mkdir dir=${dist.version.dir}-bin/
-mkdir dir=${dist.version.dir}-bin/lib/
-mkdir dir=${dist.version.dir}-bin/bin/
-mkdir dir=${dist.version.dir}-bin/conf/
-mkdir dir=${dist.version.dir}-bin/docs/
-mkdir dir=${dist.version.dir}-bin/docs/api/
-mkdir dir=${dist.version.dir}-bin/plugins/
+mkdir dir=${dist.dir}/
+mkdir dir=${bin.dist.version.dir}/
+mkdir dir=${bin.dist.version.dir}/lib/
+mkdir dir=${bin.dist.version.dir}/bin/
+mkdir dir=${bin.dist.version.dir}/conf/
+mkdir dir=${bin.dist.version.dir}/docs/
+mkdir dir=${bin.dist.version.dir}/docs/api/
+mkdir dir=${bin.dist.version.dir}/plugins/
 
-copy todir=${dist.version.dir}-bin/lib includeEmptyDirs=false
+copy todir=${bin.dist.version.dir}/lib includeEmptyDirs=false
   fileset dir=runtime/local/lib/
 /copy

-copy todir=${dist.version.dir}-bin/bin
+copy todir=${bin.dist.version.dir}/bin
   fileset dir=runtime/local/bin/
 /copy
 
 chmod perm=ugo+x type=file
-fileset dir=${dist.version.dir}-bin/bin/
+fileset dir=${bin.dist.version.dir}/bin/
 /chmod
 
-copy todir=${dist.version.dir}-bin/conf
+copy

svn commit: r1341425 - in /nutch/branches/nutchgora: ./ ivy/ src/java/org/apache/nutch/storage/ src/plugin/creativecommons/src/web/ src/plugin/protocol-httpclient/src/test/conf/

2012-05-22 Thread lewismc
Author: lewismc
Date: Tue May 22 11:59:27 2012
New Revision: 1341425

URL: http://svn.apache.org/viewvc?rev=1341425view=rev
Log:
commit to bring code up to scratch with trunk w.r.t preparation for the RC

Modified:
nutch/branches/nutchgora/NOTICE.txt
nutch/branches/nutchgora/build.xml
nutch/branches/nutchgora/default.properties
nutch/branches/nutchgora/ivy/mvn.template
nutch/branches/nutchgora/src/java/org/apache/nutch/storage/Host.java
nutch/branches/nutchgora/src/plugin/creativecommons/src/web/search.jsp
nutch/branches/nutchgora/src/plugin/creativecommons/src/web/web.xml

nutch/branches/nutchgora/src/plugin/protocol-httpclient/src/test/conf/httpclient-auth-test.xml

nutch/branches/nutchgora/src/plugin/protocol-httpclient/src/test/conf/nutch-site-test.xml

Modified: nutch/branches/nutchgora/NOTICE.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/NOTICE.txt?rev=1341425r1=1341424r2=1341425view=diff
==
--- nutch/branches/nutchgora/NOTICE.txt (original)
+++ nutch/branches/nutchgora/NOTICE.txt Tue May 22 11:59:27 2012
@@ -1,5 +1,5 @@
 Apache Nutch
-Copyright 2009 The Apache Software Foundation
+Copyright 2012 The Apache Software Foundation
 
 This product includes software developed by The Apache Software
 Foundation (http://www.apache.org/).

Modified: nutch/branches/nutchgora/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1341425r1=1341424r2=1341425view=diff
==
--- nutch/branches/nutchgora/build.xml (original)
+++ nutch/branches/nutchgora/build.xml Tue May 22 11:59:27 2012
@@ -144,27 +144,40 @@
arg value=${javadoc.proxy.host} /
arg value=${javadoc.proxy.port} /
 
-   packageset dir=${src.dir} /
-   packageset dir=${plugins.dir}/lib-http/src/java /
-   packageset dir=${plugins.dir}/lib-regex-filter/src/java /
-   packageset dir=${plugins.dir}/microformats-reltag/src/java /
-   packageset dir=${plugins.dir}/protocol-file/src/java /
-   packageset dir=${plugins.dir}/protocol-ftp/src/java /
-   packageset dir=${plugins.dir}/protocol-http/src/java /
-   packageset dir=${plugins.dir}/protocol-httpclient/src/java /
-   packageset dir=${plugins.dir}/parse-tika/src/java /
-   packageset dir=${plugins.dir}/parse-ext/src/java /
-   packageset dir=${plugins.dir}/parse-js/src/java /
-   packageset dir=${plugins.dir}/parse-swf/src/java /
-   packageset dir=${plugins.dir}/parse-zip/src/java /
-   packageset dir=${plugins.dir}/index-basic/src/java /
-   packageset dir=${plugins.dir}/index-more/src/java /
-   packageset dir=${plugins.dir}/scoring-opic/src/java /
-   packageset dir=${plugins.dir}/urlfilter-automaton/src/java /
-   packageset dir=${plugins.dir}/urlfilter-regex/src/java /
-   packageset dir=${plugins.dir}/urlfilter-prefix/src/java /
-   packageset dir=${plugins.dir}/creativecommons/src/java /
-   packageset dir=${plugins.dir}/language-identifier/src/java /
+   packageset dir=${src.dir}/
+  packageset dir=${plugins.dir}/creativecommons/src/java/
+  packageset dir=${plugins.dir}/feed/src/java/
+  packageset dir=${plugins.dir}/index-anchor/src/java/
+  packageset dir=${plugins.dir}/index-basic/src/java/
+  packageset dir=${plugins.dir}/index-more/src/java/
+  packageset dir=${plugins.dir}/language-identifier/src/java/
+  packageset dir=${plugins.dir}/lib-http/src/java/
+  packageset dir=${plugins.dir}/lib-regex-filter/src/java/
+  packageset dir=${plugins.dir}/microformats-reltag/src/java/
+  packageset dir=${plugins.dir}/parse-ext/src/java/
+  packageset dir=${plugins.dir}/parse-html/src/java/
+  packageset dir=${plugins.dir}/parse-js/src/java/
+  packageset dir=${plugins.dir}/parse-swf/src/java/
+  packageset dir=${plugins.dir}/parse-tika/src/java/
+  packageset dir=${plugins.dir}/parse-zip/src/java/
+  packageset dir=${plugins.dir}/protocol-file/src/java/
+  packageset dir=${plugins.dir}/protocol-ftp/src/java/
+  packageset dir=${plugins.dir}/protocol-http/src/java/
+  packageset dir=${plugins.dir}/protocol-httpclient/src/java/
+  packageset dir=${plugins.dir}/protocol-sftp/src/java/
+  packageset dir=${plugins.dir}/scoring-link/src/java/
+  packageset dir=${plugins.dir}/scoring-opic/src/java/
+  packageset dir=${plugins.dir}/subcollection/src/java/
+  packageset dir=${plugins.dir}/tld/src/java/
+  packageset dir=${plugins.dir}/urlfilter-automaton/src/java/
+  packageset dir=${plugins.dir}/urlfilter-domain/src/java/
+  packageset dir=${plugins.dir}/urlfilter-prefix/src/java/
+  packageset dir=${plugins.dir}/urlfilter-regex/src/java/
+  packageset dir=${plugins.dir}/urlfilter-suffix/src/java/
+  packageset dir=${plugins.dir}/urlfilter-validator/src/java/
+  packageset dir=${plugins.dir}/urlnormalizer-basic/src/java/
+  packageset dir

svn commit: r1341570 - in /nutch/branches/branch-1.5: KEYS build.xml ivy/mvn.template

2012-05-22 Thread lewismc
Author: lewismc
Date: Tue May 22 17:31:35 2012
New Revision: 1341570

URL: http://svn.apache.org/viewvc?rev=1341570view=rev
Log:
final commit to before pushing RC2

Modified:
nutch/branches/branch-1.5/KEYS
nutch/branches/branch-1.5/build.xml
nutch/branches/branch-1.5/ivy/mvn.template

Modified: nutch/branches/branch-1.5/KEYS
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.5/KEYS?rev=1341570r1=1341569r2=1341570view=diff
==
--- nutch/branches/branch-1.5/KEYS (original)
+++ nutch/branches/branch-1.5/KEYS Tue May 22 17:31:35 2012
@@ -240,3 +240,62 @@ vIxV0MEa+WSBaplFlj0ACfs2Hdh6skErvMAzXHBC
 oPLz8+OED4Ec4Pjuuic3HX2Ff0myAKDd74+vKoAC78+CStjf1pSSmy4y4w==
 =mbQZ
 -END PGP PUBLIC KEY BLOCK-
+
+pub   4096R/C601BCA7 2012-04-17
+uid  Lewis John McGibbney (CODE SIGNING KEY) 
lewi...@apache.org
+sig 3C601BCA7 2012-04-17  Lewis John McGibbney (CODE SIGNING KEY) 
lewi...@apache.org
+sub   4096R/FCD9FF28 2012-04-17
+sig  C601BCA7 2012-04-17  Lewis John McGibbney (CODE SIGNING KEY) 
lewi...@apache.org
+
+-BEGIN PGP PUBLIC KEY BLOCK-
+Version: GnuPG v1.4.10 (GNU/Linux)
+
+mQINBE+NSUkBEAC3Qu1mT3x0swS4zXta2NnJtrepOqpsU292U+hzkbjdG8W+W2WA
+3oRdd5f/iKkkE1Z3q53qD++PazLQf+g+378Ce+CP4bwhZuz/CgSa8EO2rIXadVUG
+M+XBAiSlLWyQhwW8qbipGQvpT1PXp8mjwXlWzt+0+4F9ybepYxStUPaybIFfSn+f
+M8YzYLgfKSsHMgPeK6TGRJAqC+u7t+XMYWmfVS9TpoOyfZ3tsn3YmeH4JiqF49/0
+XzkqgM7FW52By64Nm6xCOfqXCaMmVV5JRuZFhLB4VmWlH/Mikv5Tu99gsAdGwFIb
+MhMWtWZ/azKarTkQiZjDka09Mxc6skXCBBbxz9lstE4X50d5PMqOgVBtFstmL64h
+Km2dSIdVEUyjM9y1HBRZO1+ooNs5xja1DnSAuytstrRnt5Vdnuk/RS8t2qfcm2jP
+NWrZNOix9U+pT7qUQ1wbK/ew+qWbNFlvp9i3XyZdfPpyEmYD4CsBvkVbiH+FULwS
+F4OJQlJoDJ1vHnSPMNSGtiNRTLSQ2+E6huqktyAY+rcTamCEkCdoZ5NTyMbEgqZ1
+P4fr+h+EpV0h/ACzjhE4sq6MK6KZFv3a3Erlk4oC93BVJpcYyZyQneKQSapbAv9u
+oYCTLHyCrBdXItnFEHhy1zN0DvbWoGtsxDvAVjY3D9YP32Yu3WvxeW25bQARAQAB
+tDxMZXdpcyBKb2huIE1jR2liYm5leSAoQ09ERSBTSUdOSU5HIEtFWSkgPGxld2lz
+bWNAYXBhY2hlLm9yZz6JAjgEEwECACIFAk+NSUkCGwMGCwkIBwMCBhUIAgkKCwQW
+AgMBAh4BAheAAAoJEPReeXDGAbynxnQP/1s1e1eDUAvZv1k+OVhG+nDhqtBtmFV6
+sx67atpzZCj6ckKXphkiWAFmYsAH7pujHgASuAIoMY7MLjaRuG2MiEdWINYH5LVB
+xmZ3M9f1+YBuTSs/0KKBfqVBYm5vbEC+vBkjez54DOJ7OfRQllra98FR5GxEoYhh
+bIQDtUtYrLjzd9kbUH5J+cTgSJ08ciIxanscvFRE7+X2sQTopor6f+o7iea7k6KM
+b5FJ9mi4Q3RQbkorncyyDp4O7rBsuaGeD2oORdSM1zT5ql3glq7cYUI8havHY696
+jWYLOc951l6fDofGi4ZirX0+Mlxj+d2BNY54rx9dl6pZOmahvD4pveq/vbzwOH9E
+vb1uTfRIYLaNW++1nXzPBZ5nzsemDb3K8yVYXnCDrqmzOZMJu5AinvUUusTrRhT/
+4oy2AO1YEIjgwHFzYvv7C7/wYSQC5AxvO0plvyH/kMK/vQk3H7I13isHdyZhEjrR
+e+ciNzPWh4R6W8zVbe29MljItmINWniJ/CnYi9/r7ZtkQUBUCmHQZcsCm2DflA83
+ueLozFY3NH2eQ4q9dY8QIJDOpsX1SrP8DUOpuai3PvEiE8stHxGpamFq2DgnS81x
+/e/kSbIBD6QGgP1S7Zrkdz4jriCCY4mv9mYMu9De/sObYcpGdg6rE49lz9NWeE8w
+Wtt1oexR6DhpuQINBE+NSUkBEADOm92hnYd9ZNSmaVSUegmo0Rx9CMIzRZzHXPXT
+SxxMnJScWDKeTWa7U1A0peiNIUKKlgFcnUY176o4wk8y2sNgyYkYO6wQlzmoyQIh
+Ft0fqE3LMKBJcW2JONWFVrFZpRPTFvRWnDOSur8IQq3rJkyiqfT5y0E7PAdd8aa3
+l7anp8gfKCf9iIYtgfNsKNphngkwOLNDVsED7G/VRfAezjDKyf0M9HSL0fjQ5YDe
+L5MMmgduvYKBtWISM5tqJAunkMpGeWJ6/khJZT+bLK8iLM2073W5uSlNs6oO2AM8
+lDvfmnsFC4178mbU9nJNi+KAXzwZXH4xcqywRKZhuWI5BVPGi50HJ/RIZtDyrkrK
+W7NACtmniuFzSy9PxrM2iappUsfY8b7uZBzGoo1BzT7F7VM7sSte+X+zs8TZ0dam
+6TbuGMuv5rPQGAwu2JWUNOeBzXvfkg3gzk4qZrBdHtUrQjx33c1NBZddLcoSqzgC
+ph2cz4NG4Fs/Mi8SXoKBwJGVeWE+ZCBma8vFP/zctb/XroIaFSE5rAwHydwCB4gu
+VB3rNuLCoiiB50lPzAPFjjFxOuZeTZfl4bp1XRE1KKYi+n974At4HDd5g0Az8w37
+5/9G+pARCzjytvIHJTYQDsG0hfnj2Vfb5WWYF6LMib0ZGf739Yp7L602/yE9QAKm
+bifPCQARAQABiQIfBBgBAgAJBQJPjUlJAhsMAAoJEPReeXDGAbynzc4P/AomVPfY
+bY61TE+QSKAJl8/dyyw+LSddTPFTleVBFHlq1tnQmLWxoNq5t1CRXUJOv3q6haPE
+PLKR5pXXtNzAGVP74Jipa5r8FQjBG0j+XriiHmr861xyno0uPG23c0LSRqHrcLi6
+tgN2Q2ihu1Tjaql+ukzPI6u2v97FD0qhJWKvFFo64p7HTNUXHJLQ9N/m1Pien7Nm
+KFLRI0Pu0CW95I1w2gAAlS++lIxT3/ANfw6SpK9+lNBaan1g0xM5/P54MIQvZgCQ
+gdIcWdAOmXjTyMryconkeNRWpkYjXG4hZj9crP48j3lZPlUYol4pdkQ1CtSq1emv
+VDGoUrn5bRWoybOFfx3joOLpUqJA5PDjeN7YMpJNWc3O/lz+S+sW9WZY7vwbK+Mn
+E/l4Bz2k9fQDsxm2rPzM2aS/qaBo9v7vj+NE85B2/NE9cXo0WoC8u5o+KEQY6urV
+ANW/A0k94wmfoBMbmzNZ5Y5zJ9vceW9d4FE2FXaynRke2awYHBZE2Ty3MSxCQAvp
+MREQKzxB1XcR+Frj0nMKMmdEmM55OmIgAqAct1OuGDbOATJMcmVuwHqTZIdynzqh
+NPgXHx4ASqesjF/9GUrAQfOmXqHdOF6xOb7YYGssl1kgvOQRVJhkWtmTckyk+xu9
+U3Wt+q9F6O+RmemV6a6mrpog+Aq+BkIMWCJ8
+=xHbT
+-END PGP PUBLIC KEY BLOCK-

Modified: nutch/branches/branch-1.5/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.5/build.xml?rev=1341570r1=1341569r2=1341570view=diff
==
--- nutch/branches/branch-1.5/build.xml (original)
+++ nutch/branches/branch-1.5/build.xml Tue May 22 17:31:35 2012
@@ -168,7 +168,6 @@
   packageset dir=${plugins.dir}/parse-swf/src/java/
   packageset dir=${plugins.dir}/parse-tika/src/java/
   packageset dir=${plugins.dir}/parse-zip/src/java/
-  packageset dir=${plugins.dir}/lib-http/src/java/
   packageset dir=${plugins.dir}/protocol-file/src/java

svn commit: r1341574 - /nutch/branches/branch-1.5/pom.xml

2012-05-22 Thread lewismc
Author: lewismc
Date: Tue May 22 17:55:07 2012
New Revision: 1341574

URL: http://svn.apache.org/viewvc?rev=1341574view=rev
Log:
commit to bring pom.xml up-tp-date for tag... generated via ant deploy task

Modified:
nutch/branches/branch-1.5/pom.xml

Modified: nutch/branches/branch-1.5/pom.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.5/pom.xml?rev=1341574r1=1341573r2=1341574view=diff
==
--- nutch/branches/branch-1.5/pom.xml (original)
+++ nutch/branches/branch-1.5/pom.xml Tue May 22 17:55:07 2012
@@ -15,34 +15,28 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 --
-project xmlns=http://maven.apache.org/POM/4.0.0; 
xmlns:xsi=http://www.w3.org/2001/XMLSchema-instance; 
xsi:schemaLocation=http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd;
+project xmlns=http://maven.apache.org/POM/4.0.0; 
xmlns:xsi=http://www.w3.org/2001/XMLSchema-instance;
+xsi:schemaLocation=http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd;
 
-   modelVersion4.0.0/modelVersion
-   parent
-   groupIdorg.apache/groupId
-   artifactIdapache/artifactId
-   version9/version
-   relativePath /
-   /parent   
-   groupIdorg.apache.nutch/groupId
-   artifactIdnutch/artifactId
-   packagingjar/packaging
-   version1.5/version
-   nameApache Nutch/name
-   urlhttp://nutch.apache.org/url
-   licenses
-   license
-   nameThe Apache Software License, Version 2.0/name
-   
urlhttp://www.apache.org/licenses/LICENSE-2.0.txt/url
-   distributionrepo/distribution
-   /license
-   /licenses
-   scm
-   
connectionscm:svn:http://svn.apache.org/repos/asf/nutch/trunk//connection
-   
developerConnectionscm:svn:https://svn.apache.org/repos/asf/nutch/trunk//developerConnection
-   urlhttp://svn.apache.org/viewvc/nutch/trunk//url
-   /scm
-   developers
+  modelVersion4.0.0/modelVersion
+  groupIdorg.apache.nutch/groupId
+  artifactIdnutch/artifactId
+  packagingjar/packaging
+  version1.5/version
+  nameApache Nutch/name
+  urlhttp://nutch.apache.org/url
+  licenses
+ license
+   nameThe Apache Software License, Version 2.0/name
+   urlhttp://www.apache.org/licenses/LICENSE-2.0.txt/url
+   distributionrepo/distribution
+ /license
+  /licenses
+  scm
+ urlhttp://svn.apache.org/viewvc/nutch/url
+ connectionhttp://svn.apache.org/viewvc/nutch/connection
+  /scm
+  developers
developer
idab/id
nameAndrzej Bialecki/name
@@ -57,13 +51,18 @@
idkubes/id
nameDennis Kubes/name
emailku...@apache.org/email
-   /developer
+   /developer
developer
iddogacan/id
-   nameDogacan Güney/name
+   nameDogacan Güney/name
emaildoga...@apache.org/email
/developer
developer
+idferdy/id
+nameFerdy Galema/name
+emailfe...@apache.org/email
+/developer
+   developer
idjnioche/id
nameJulien Nioche/name
emailjnio...@apache.org/email
@@ -73,228 +72,152 @@
nameSami Siren/name
emailsi...@apache.org/email
/developer
-   developer
-   idmarkus/id
-   nameMarkus Jelsma/name
-   emailmar...@apache.org/email
-   /developer
-   developer
-   idalexis/id
-   nameAlexis Detlegrode/name
-   emailale...@apache.org/email
-   /developer
-   developer
-   idlewismc/id
-   nameLewis John McGibbney/name
-   emaillewi...@apache.org/email
-   /developer
-   developer
-   idferdy/id
-   nameFerdy Galema/name
-   emailfe...@apache.org/email
-   /developer
-   /developers
-   build
-   testSourceDirectory${basedir}/src/test/testSourceDirectory
-   sourceDirectory${basedir}/src/java/sourceDirectory
-   testResources
-   testResource
-   directorysrc/testresources/directory
-   /testResource
-   testResource
-   directoryconf//directory

svn commit: r1341603 - in /nutch/trunk: CHANGES.txt build.xml

2012-05-22 Thread lewismc
Author: lewismc
Date: Tue May 22 20:07:55 2012
New Revision: 1341603

URL: http://svn.apache.org/viewvc?rev=1341603view=rev
Log:
commit to add new ant targets to build.xml

Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/build.xml

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1341603r1=1341602r2=1341603view=diff
==
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue May 22 20:07:55 2012
@@ -1,5 +1,9 @@
 Nutch Change Log
 
+(trunk) Current Development:
+
+* NUTCH-XX Commit to add configuration for separation of ant distribution 
targets (lewismc + jnioche)
+
 Release 1.5 - 04/15/2012
 
 * NUTCH-1208 Don't include KEYS file in bin distribution (jnioche)

Modified: nutch/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1341603r1=1341602r2=1341603view=diff
==
--- nutch/trunk/build.xml (original)
+++ nutch/trunk/build.xml Tue May 22 20:07:55 2012
@@ -601,52 +601,52 @@
   !-- == --
   !----
   !-- == --
-  target name=package depends=runtime, javadoc
-mkdir dir=${dist.version.dir}/
-mkdir dir=${dist.version.dir}/lib/
-mkdir dir=${dist.version.dir}/runtime/
-mkdir dir=${dist.version.dir}/docs/
-mkdir dir=${dist.version.dir}/docs/api/
-mkdir dir=${dist.version.dir}/ivy/
+  target name=package-src depends=runtime, javadoc
+mkdir dir=${dist.version.dir}-src/
+mkdir dir=${dist.version.dir}-src/lib/
+mkdir dir=${dist.version.dir}-src/runtime/
+mkdir dir=${dist.version.dir}-src/docs/
+mkdir dir=${dist.version.dir}-src/docs/api/
+mkdir dir=${dist.version.dir}-src/ivy/
 
-copy todir=${dist.version.dir}/lib includeEmptyDirs=false
+copy todir=${dist.version.dir}-src/lib includeEmptyDirs=false
   fileset dir=lib/
 /copy

-copy todir=${dist.version.dir}/runtime
+copy todir=${dist.version.dir}-src/runtime
   fileset dir=runtime/
 /copy
 
 chmod perm=ugo+x type=file
-fileset dir=${dist.version.dir}/runtime/deploy/bin/
-fileset dir=${dist.version.dir}/runtime/local/bin/
+fileset dir=${dist.version.dir}-src/runtime/deploy/bin/
+fileset dir=${dist.version.dir}-src/runtime/local/bin/
 /chmod
 
-copy todir=${dist.version.dir}/conf
+copy todir=${dist.version.dir}-src/conf
   fileset dir=${conf.dir} excludes=**/*.template/
 /copy
 
-copy todir=${dist.version.dir}/docs/api
+copy todir=${dist.version.dir}-src/docs/api
   fileset dir=${build.javadoc}/
 /copy
 
-copy todir=${dist.version.dir}
+copy todir=${dist.version.dir}-src
   fileset dir=.
 include name=*.txt /
 !--include name=KEYS /--
   /fileset
 /copy
 
-copy todir=${dist.version.dir}/src includeEmptyDirs=true
+copy todir=${dist.version.dir}-src/src includeEmptyDirs=true
   fileset dir=src/
 /copy
 
-copy todir=${dist.version.dir}/ivy includeEmptyDirs=true
+copy todir=${dist.version.dir}-src/ivy includeEmptyDirs=true
   fileset dir=ivy/
 /copy
 
-copy todir=${dist.version.dir}/ file=build.xml/
-copy todir=${dist.version.dir}/ file=default.properties/
+copy todir=${dist.version.dir}-src/ file=build.xml/
+copy todir=${dist.version.dir}-src/ file=default.properties/
 
   /target
 
@@ -696,7 +696,23 @@
   !-- == --
   target name=tar depends=package
 tar compression=gzip longfile=gnu
-  destfile=${dist.dir}/${final.name}.tar.gz
+  destfile=${dist.dir}/${final.name}-src.tar.gz
+  tarfileset dir=${dist.dir} mode=664
+   exclude name=${final.name}/bin/* /
+include name=${final.name}/** /
+  /tarfileset
+  tarfileset dir=${dist.dir} mode=755
+include name=${final.name}/bin/* /
+  /tarfileset
+/tar
+  /target
+  
+  !-- == --
+  !-- Make bin release tarball   
--
+  !-- == --
+  target name=tar-bin depends=package
+tar compression=gzip longfile=gnu
+  destfile=${dist.dir}/${final.name}-bin.tar.gz
   tarfileset dir=${dist.dir} mode=664
exclude name=${final.name}/bin/* /
 include name=${final.name}/** /
@@ -710,8 +726,23 @@
   !-- == --
   !-- Make release zip   --
   !-- == --
-  target name=zip depends=package
-   zip compress

svn commit: r1341609 - in /nutch/branches/nutchgora: CHANGES.txt build.xml conf/nutch-default.xml default.properties

2012-05-22 Thread lewismc
Author: lewismc
Date: Tue May 22 20:18:22 2012
New Revision: 1341609

URL: http://svn.apache.org/viewvc?rev=1341609view=rev
Log:
final commit before rolling 2.0RC

Modified:
nutch/branches/nutchgora/CHANGES.txt
nutch/branches/nutchgora/build.xml
nutch/branches/nutchgora/conf/nutch-default.xml
nutch/branches/nutchgora/default.properties

Modified: nutch/branches/nutchgora/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1341609r1=1341608r2=1341609view=diff
==
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Tue May 22 20:18:22 2012
@@ -1,6 +1,8 @@
 Nutch Change Log
 
-Release nutchgora - Current Development
+Release 2.1 (22/02/2012)
+
+* NUTCH-XX Commit to add configuration for separation of ant distribution 
targets (lewismc + jnioche)
 
 * NUTCH-1364 Add a counter for malformed urls (Jason Trost via lewismc)
 

Modified: nutch/branches/nutchgora/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1341609r1=1341608r2=1341609view=diff
==
--- nutch/branches/nutchgora/build.xml (original)
+++ nutch/branches/nutchgora/build.xml Tue May 22 20:18:22 2012
@@ -600,59 +600,59 @@
  /target
 
  !-- == --
- !-- D I S T R I B U T I O N --
- !-- == --
- !-- --
- !-- == --
- target name=package depends=runtime, javadoc
-  mkdir dir=${dist.version.dir} /
-  mkdir dir=${dist.version.dir}/lib /
-  mkdir dir=${dist.version.dir}/runtime /
-  mkdir dir=${dist.version.dir}/docs /
-  mkdir dir=${dist.version.dir}/docs/api /
-  mkdir dir=${dist.version.dir}/ivy /
-
-  copy todir=${dist.version.dir}/lib includeEmptyDirs=false
-   fileset dir=lib /
-  /copy
-
-  copy todir=${dist.version.dir}/runtime
-   fileset dir=runtime /
-  /copy
-
-  chmod perm=ugo+x type=file
-   fileset dir=${dist.version.dir}/runtime/deploy/bin /
-   fileset dir=${dist.version.dir}/runtime/local/bin /
-  /chmod
-
-  copy todir=${dist.version.dir}/conf
-   fileset dir=${conf.dir} excludes=**/*.template /
-  /copy
-
-  copy todir=${dist.version.dir}/docs/api
-   fileset dir=${build.javadoc} /
-  /copy
-
-  copy todir=${dist.version.dir}
-   fileset dir=.
-include name=*.txt /
-include name=KEYS /
-   /fileset
-  /copy
+  !-- D I S T R I B U T I O N--
+  !-- == --
+  !----
+  !-- == --
+  target name=package-src depends=runtime, javadoc
+mkdir dir=${dist.version.dir}-src/
+mkdir dir=${dist.version.dir}-src/lib/
+mkdir dir=${dist.version.dir}-src/runtime/
+mkdir dir=${dist.version.dir}-src/docs/
+mkdir dir=${dist.version.dir}-src/docs/api/
+mkdir dir=${dist.version.dir}-src/ivy/
 
-  copy todir=${dist.version.dir}/src includeEmptyDirs=true
-   fileset dir=src /
-  /copy
+copy todir=${dist.version.dir}-src/lib includeEmptyDirs=false
+  fileset dir=lib/
+/copy
+   
+copy todir=${dist.version.dir}-src/runtime
+  fileset dir=runtime/
+/copy
 
-  copy todir=${dist.version.dir}/ivy includeEmptyDirs=true
-   fileset dir=ivy /
-  /copy
+chmod perm=ugo+x type=file
+fileset dir=${dist.version.dir}-src/runtime/deploy/bin/
+fileset dir=${dist.version.dir}-src/runtime/local/bin/
+/chmod
 
-  copy todir=${dist.version.dir}/ file=build.xml /
-  copy todir=${dist.version.dir}/ file=default.properties /
+copy todir=${dist.version.dir}-src/conf
+  fileset dir=${conf.dir} excludes=**/*.template/
+/copy
+
+copy todir=${dist.version.dir}-src/docs/api
+  fileset dir=${build.javadoc}/
+/copy
+
+copy todir=${dist.version.dir}-src
+  fileset dir=.
+include name=*.txt /
+!--include name=KEYS /--
+  /fileset
+/copy
+
+copy todir=${dist.version.dir}-src/src includeEmptyDirs=true
+  fileset dir=src/
+/copy
+
+copy todir=${dist.version.dir}-src/ivy includeEmptyDirs=true
+  fileset dir=ivy/
+/copy
+
+copy todir=${dist.version.dir}-src/ file=build.xml/
+copy todir=${dist.version.dir}-src/ file=default.properties/
+
+  /target
 
- /target
- 
  target name=package-bin depends=runtime, javadoc
 mkdir dir=${dist.version.dir}-bin/
 mkdir dir=${dist.version.dir}-bin/lib/
@@ -694,36 +694,67 @@
 
   /target
 
- !-- == --
- !-- Make release tarball --
- !-- == --
- target name=tar depends=package

svn commit: r1341100 - in /nutch/branches/nutchgora: ./ conf/ src/java/org/apache/nutch/metadata/ src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/ src/plugin/protocol-http/src/java/org

2012-05-21 Thread lewismc
Author: lewismc
Date: Mon May 21 16:40:32 2012
New Revision: 1341100

URL: http://svn.apache.org/viewvc?rev=1341100view=rev
Log:
commit to address NUTCH-1360 and update to CHANGES.txt

Modified:
nutch/branches/nutchgora/CHANGES.txt
nutch/branches/nutchgora/conf/nutch-default.xml
nutch/branches/nutchgora/src/java/org/apache/nutch/metadata/HttpHeaders.java

nutch/branches/nutchgora/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java

nutch/branches/nutchgora/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java

nutch/branches/nutchgora/src/plugin/protocol-sftp/src/java/org/apache/nutch/protocol/sftp/Sftp.java

Modified: nutch/branches/nutchgora/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1341100r1=1341099r2=1341100view=diff
==
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Mon May 21 16:40:32 2012
@@ -1,6 +1,9 @@
 Nutch Change Log
 
 Release nutchgora - Current Development
+
+* NUTCH-1360 Support the storing of IP address connected to when web crawling 
(lewismc)
+
 * NUTCH-1366 speed up indexing by eliminating the indexreducer (ferdy)
 
 * NUTCH-1362 Fix error handling of urls with empty fields (lewis, ferdy)

Modified: nutch/branches/nutchgora/conf/nutch-default.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/nutch-default.xml?rev=1341100r1=1341099r2=1341100view=diff
==
--- nutch/branches/nutchgora/conf/nutch-default.xml (original)
+++ nutch/branches/nutchgora/conf/nutch-default.xml Mon May 21 16:40:32 2012
@@ -257,6 +257,13 @@
   /description
 /property
 
+property
+  namehttp.store.ip.address/name
+  valuefalse/value
+  descriptionEnables us to capture the specific IP address of the 
+  host which we connect to to fetch a page./description
+/property
+
 !-- FTP properties --
 
 property

Modified: 
nutch/branches/nutchgora/src/java/org/apache/nutch/metadata/HttpHeaders.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/metadata/HttpHeaders.java?rev=1341100r1=1341099r2=1341100view=diff
==
--- 
nutch/branches/nutchgora/src/java/org/apache/nutch/metadata/HttpHeaders.java 
(original)
+++ 
nutch/branches/nutchgora/src/java/org/apache/nutch/metadata/HttpHeaders.java 
Mon May 21 16:40:32 2012
@@ -46,5 +46,7 @@ public interface HttpHeaders {
   public final static String LAST_MODIFIED = Last-Modified;
 
   public final static String LOCATION = Location;
+  
+  public final static String IP_ADDRESS = _ip;
 
 }

Modified: 
nutch/branches/nutchgora/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java?rev=1341100r1=1341099r2=1341100view=diff
==
--- 
nutch/branches/nutchgora/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
 (original)
+++ 
nutch/branches/nutchgora/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
 Mon May 21 16:40:32 2012
@@ -68,8 +68,8 @@ public abstract class HttpBase implement
   /** The Nutch 'User-Agent' request header */
   protected String userAgent = getAgentString(
   NutchCVS, null, Nutch,
-  http://lucene.apache.org/nutch/bot.html;,
-  nutch-ag...@lucene.apache.org);
+  http://nutch.apache.org/bot.html;,
+  ag...@nutch.apache.org);
 
 
   /** The Accept-Language request header value. */
@@ -77,6 +77,9 @@ public abstract class HttpBase implement
   
   /** The Accept request header value. */
   protected String accept = 
text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8;
+  
+  /** The _ip request header value. */
+  protected boolean ip_header = false;
 
   /** The default logger */
   private final static Logger LOGGER = LoggerFactory.getLogger(HttpBase.class);
@@ -117,6 +120,7 @@ public abstract class HttpBase implement
 .get(http.agent.description), conf.get(http.agent.url), 
conf.get(http.agent.email));
 this.acceptLanguage = conf.get(http.accept.language, acceptLanguage);
 this.accept = conf.get(http.accept, accept);
+this.ip_header = conf.getBoolean(http.store.ip.address, false);
 this.mimeTypes = new MimeUtil(conf);
 this.useHttp11 = conf.getBoolean(http.useHttp11, false);
 this.robots.setConf(conf);
@@ -246,6 +250,10 @@ public abstract class HttpBase implement
   public boolean getUseHttp11() {
 return useHttp11;
   }
+  
+  public boolean getIP_Header(){
+   return ip_header;
+  }
 
   private static String getAgentString(String agentName,
   String agentVersion

svn commit: r1341128 - /nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorMapper.java

2012-05-21 Thread lewismc
Author: lewismc
Date: Mon May 21 17:46:15 2012
New Revision: 1341128

URL: http://svn.apache.org/viewvc?rev=1341128view=rev
Log:
trivial commit to make logging configuration for NUTCH-1361 consistent with 
trunk

Modified:

nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorMapper.java

Modified: 
nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorMapper.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorMapper.java?rev=1341128r1=1341127r2=1341128view=diff
==
--- 
nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorMapper.java 
(original)
+++ 
nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorMapper.java 
Mon May 21 17:46:15 2012
@@ -62,10 +62,15 @@ extends GoraMapperString, WebPage, Sele
   if (filter  filters.filter(url) == null)
 return;
 } catch (URLFilterException e) {
-  GeneratorJob.LOG.warn(Couldn't filter url:  + url +  ( + 
e.getMessage() + ));
-  return;
+  if (GeneratorJob.LOG.isWarnEnabled()) {
+GeneratorJob.LOG.warn(Couldn't filter url:  + url +  ( + 
e.getMessage() + ));
+return;
+  }
 } catch (MalformedURLException e) {
-  GeneratorJob.LOG.warn(Couldn't filter url:  + url +  ( + 
e.getMessage() +));
+  if (GeneratorJob.LOG.isWarnEnabled()) {
+GeneratorJob.LOG.warn(Couldn't filter url:  + url +  ( + 
e.getMessage() +));
+return;
+  }
 }
 
 // check fetch schedule




svn commit: r1341137 - in /nutch/branches/nutchgora: CHANGES.txt src/java/org/apache/nutch/crawl/GeneratorReducer.java

2012-05-21 Thread lewismc
Author: lewismc
Date: Mon May 21 18:25:09 2012
New Revision: 1341137

URL: http://svn.apache.org/viewvc?rev=1341137view=rev
Log:
commit to address NUTCH-1364 and update to CHANGES.txt

Modified:
nutch/branches/nutchgora/CHANGES.txt

nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorReducer.java

Modified: nutch/branches/nutchgora/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1341137r1=1341136r2=1341137view=diff
==
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Mon May 21 18:25:09 2012
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release nutchgora - Current Development
 
+* NUTCH-1364 Add a counter for malformed urls (Jason Trost via lewismc)
+
 * NUTCH-1361 Fix mishandling of malformed urls in generator job (Jason Trost 
via lewismc)
 
 * NUTCH-1360 Support the storing of IP address connected to when web crawling 
(lewismc)

Modified: 
nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorReducer.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorReducer.java?rev=1341137r1=1341136r2=1341137view=diff
==
--- 
nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorReducer.java 
(original)
+++ 
nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorReducer.java 
Mon May 21 18:25:09 2012
@@ -77,6 +77,7 @@ extends GoraReducerSelectorEntry, WebPa
   try {
 context.write(TableUtil.reverseUrl(key.url), page);
   } catch (MalformedURLException e) {
+   context.getCounter(Generator, MALFORMED_URL).increment(1);
 continue;
   }
   context.getCounter(Generator, GENERATE_MARK).increment(1);




svn commit: r1340546 - in /nutch/trunk: ./ src/java/org/apache/nutch/indexer/ src/plugin/creativecommons/src/web/ src/plugin/protocol-httpclient/src/test/conf/

2012-05-19 Thread lewismc
Author: lewismc
Date: Sat May 19 19:27:09 2012
New Revision: 1340546

URL: http://svn.apache.org/viewvc?rev=1340546view=rev
Log:
trivial commit to address issues with RC

Modified:
nutch/trunk/NOTICE.txt
nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
nutch/trunk/src/plugin/creativecommons/src/web/web.xml

nutch/trunk/src/plugin/protocol-httpclient/src/test/conf/httpclient-auth-test.xml
nutch/trunk/src/plugin/protocol-httpclient/src/test/conf/nutch-site-test.xml

Modified: nutch/trunk/NOTICE.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/NOTICE.txt?rev=1340546r1=1340545r2=1340546view=diff
==
--- nutch/trunk/NOTICE.txt (original)
+++ nutch/trunk/NOTICE.txt Sat May 19 19:27:09 2012
@@ -1,5 +1,5 @@
 Apache Nutch
-Copyright 2009 The Apache Software Foundation
+Copyright 2012 The Apache Software Foundation
 
 This product includes software developed by The Apache Software
 Foundation (http://www.apache.org/).

Modified: 
nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java?rev=1340546r1=1340545r2=1340546view=diff
==
--- nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java 
Sat May 19 19:27:09 2012
@@ -1,3 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the License); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ 
 package org.apache.nutch.indexer;
 
 import java.util.Arrays;

Modified: nutch/trunk/src/plugin/creativecommons/src/web/web.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/creativecommons/src/web/web.xml?rev=1340546r1=1340545r2=1340546view=diff
==
--- nutch/trunk/src/plugin/creativecommons/src/web/web.xml (original)
+++ nutch/trunk/src/plugin/creativecommons/src/web/web.xml Sat May 19 19:27:09 
2012
@@ -1,4 +1,20 @@
 ?xml version=1.0 encoding=ISO-8859-1?
+!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the License); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an AS IS BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+--
 
 !DOCTYPE web-app
 PUBLIC -//Sun Microsystems, Inc.//DTD Web Application 2.3//EN

Modified: 
nutch/trunk/src/plugin/protocol-httpclient/src/test/conf/httpclient-auth-test.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-httpclient/src/test/conf/httpclient-auth-test.xml?rev=1340546r1=1340545r2=1340546view=diff
==
--- 
nutch/trunk/src/plugin/protocol-httpclient/src/test/conf/httpclient-auth-test.xml
 (original)
+++ 
nutch/trunk/src/plugin/protocol-httpclient/src/test/conf/httpclient-auth-test.xml
 Sat May 19 19:27:09 2012
@@ -1,4 +1,20 @@
 ?xml version=1.0?
+!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the License); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an AS IS BASIS

svn commit: r1340553 - /nutch/branches/branch-1.5/

2012-05-19 Thread lewismc
Author: lewismc
Date: Sat May 19 20:10:33 2012
New Revision: 1340553

URL: http://svn.apache.org/viewvc?rev=1340553view=rev
Log:
Committing a branch of trunk for the 1.5RC2

Added:
nutch/branches/branch-1.5/
  - copied from r1340552, nutch/trunk/



svn commit: r1340558 - /nutch/trunk/build.xml

2012-05-19 Thread lewismc
Author: lewismc
Date: Sat May 19 20:35:36 2012
New Revision: 1340558

URL: http://svn.apache.org/viewvc?rev=1340558view=rev
Log:
trivial commit to improve Javadoc

Modified:
nutch/trunk/build.xml

Modified: nutch/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1340558r1=1340557r2=1340558view=diff
==
--- nutch/trunk/build.xml (original)
+++ nutch/trunk/build.xml Sat May 19 20:35:36 2012
@@ -149,26 +149,46 @@
 arg value=${javadoc.proxy.port}/
 
   packageset dir=${src.dir}/
+  packageset dir=${plugins.dir}/creativecommons/src/java/
+  packageset dir=${plugins.dir}/feed/src/java/
+  packageset dir=${plugins.dir}/headings/src/java/
+  packageset dir=${plugins.dir}/index-anchor/src/java/
+  packageset dir=${plugins.dir}/index-basic/src/java/
+  packageset dir=${plugins.dir}/index-metadata/src/java/
+  packageset dir=${plugins.dir}/index-more/src/java/
+  packageset dir=${plugins.dir}/index-static/src/java/
+  packageset dir=${plugins.dir}/language-identifier/src/java/
   packageset dir=${plugins.dir}/lib-http/src/java/
   packageset dir=${plugins.dir}/lib-regex-filter/src/java/
   packageset dir=${plugins.dir}/microformats-reltag/src/java/
-  packageset dir=${plugins.dir}/protocol-file/src/java/
-  packageset dir=${plugins.dir}/protocol-ftp/src/java/
-  packageset dir=${plugins.dir}/protocol-http/src/java/
-  packageset dir=${plugins.dir}/protocol-httpclient/src/java/
-  packageset dir=${plugins.dir}/parse-tika/src/java/
   packageset dir=${plugins.dir}/parse-ext/src/java/
+  packageset dir=${plugins.dir}/parse-html/src/java/
   packageset dir=${plugins.dir}/parse-js/src/java/
+  packageset dir=${plugins.dir}/parse-metatags/src/java/
   packageset dir=${plugins.dir}/parse-swf/src/java/
+  packageset dir=${plugins.dir}/parse-tika/src/java/
   packageset dir=${plugins.dir}/parse-zip/src/java/
-  packageset dir=${plugins.dir}/index-basic/src/java/
-  packageset dir=${plugins.dir}/index-more/src/java/
+  packageset dir=${plugins.dir}/lib-http/src/java/
+  packageset dir=${plugins.dir}/protocol-file/src/java/
+  packageset dir=${plugins.dir}/protocol-ftp/src/java/
+  packageset dir=${plugins.dir}/protocol-http/src/java/
+  packageset dir=${plugins.dir}/protocol-httpclient/src/java/
+  packageset dir=${plugins.dir}/scoring-link/src/java/
   packageset dir=${plugins.dir}/scoring-opic/src/java/
+  packageset dir=${plugins.dir}/subcollection/src/java/
+  packageset dir=${plugins.dir}/tld/src/java/
   packageset dir=${plugins.dir}/urlfilter-automaton/src/java/
+  packageset dir=${plugins.dir}/urlfilter-domain/src/java/
+  packageset dir=${plugins.dir}/urlfilter-domainblacklist/src/java/
+  packageset dir=${plugins.dir}/urlfilter-prefix/src/java/
   packageset dir=${plugins.dir}/urlfilter-regex/src/java/
   packageset dir=${plugins.dir}/urlfilter-prefix/src/java/
-  packageset dir=${plugins.dir}/creativecommons/src/java/
-  packageset dir=${plugins.dir}/language-identifier/src/java/
+  packageset dir=${plugins.dir}/urlfilter-suffix/src/java/
+  packageset dir=${plugins.dir}/urlfilter-validator/src/java/
+  packageset dir=${plugins.dir}/urlmeta/src/java/
+  packageset dir=${plugins.dir}/urlnormalizer-basic/src/java/
+  packageset dir=${plugins.dir}/urlnormalizer-pass/src/java/
+  packageset dir=${plugins.dir}/urlnormalizer-regex/src/java/
   
   link href=${javadoc.link.java}/
   link href=${javadoc.link.lucene}/
@@ -507,26 +527,46 @@
 arg value=${javadoc.proxy.port}/
 
   packageset dir=${src.dir}/
+  packageset dir=${plugins.dir}/creativecommons/src/java/
+  packageset dir=${plugins.dir}/feed/src/java/
+  packageset dir=${plugins.dir}/headings/src/java/
+  packageset dir=${plugins.dir}/index-anchor/src/java/
+  packageset dir=${plugins.dir}/index-basic/src/java/
+  packageset dir=${plugins.dir}/index-metadata/src/java/
+  packageset dir=${plugins.dir}/index-more/src/java/
+  packageset dir=${plugins.dir}/index-static/src/java/
+  packageset dir=${plugins.dir}/language-identifier/src/java/
   packageset dir=${plugins.dir}/lib-http/src/java/
   packageset dir=${plugins.dir}/lib-regex-filter/src/java/
   packageset dir=${plugins.dir}/microformats-reltag/src/java/
-  packageset dir=${plugins.dir}/protocol-file/src/java/
-  packageset dir=${plugins.dir}/protocol-ftp/src/java/
-  packageset dir=${plugins.dir}/protocol-http/src/java/
-  packageset dir=${plugins.dir}/protocol-httpclient/src/java/
-  packageset dir=${plugins.dir}/parse-tika/src/java/
   packageset dir=${plugins.dir}/parse-ext/src/java/
+  packageset dir=${plugins.dir}/parse-html/src/java/
   packageset dir=${plugins.dir}/parse-js/src/java

svn commit: r1302134 - in /nutch/trunk: build.xml default.properties src/java/overview.html

2012-03-18 Thread lewismc
Author: lewismc
Date: Sun Mar 18 15:02:50 2012
New Revision: 1302134

URL: http://svn.apache.org/viewvc?rev=1302134view=rev
Log:
incremental commit to update Javadocs

Modified:
nutch/trunk/build.xml
nutch/trunk/default.properties
nutch/trunk/src/java/overview.html

Modified: nutch/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1302134r1=1302133r2=1302134view=diff
==
--- nutch/trunk/build.xml (original)
+++ nutch/trunk/build.xml Sun Mar 18 15:02:50 2012
@@ -15,7 +15,7 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 --
-project name=Nutch default=runtime xmlns:ivy=antlib:org.apache.ivy.ant 
xmlns:artifact=antlib:org.apache.maven.artifact.ant
+project name=${name} default=runtime 
xmlns:ivy=antlib:org.apache.ivy.ant 
xmlns:artifact=antlib:org.apache.maven.artifact.ant
 
   !-- Load all the default properties, and any the user wants--
   !-- to contribute (without having to type -D or edit this file --
@@ -125,7 +125,7 @@
   !-- == --
   !----
   !-- == --  
-  target name=release depends=compile-core description=-- generate the 
release distribution
+  target name=release depends=compile-core description=generate the 
release distribution
 copy file=${conf.dir}/nutch-default.xml
   todir=${build.classes}/
 copy file=${conf.dir}/nutch-site.xml
@@ -141,8 +141,8 @@
   author=true
   version=true
   use=true
-  windowtitle=${Name} ${version} API
-  doctitle=${Name} ${version} API
+  windowtitle=${name} ${version} API
+  doctitle=${name} ${version} API
   bottom=Copyright amp;copy; ${year} The Apache Software Foundation
   
 arg value=${javadoc.proxy.host}/
@@ -205,7 +205,7 @@
   !-- == --
   !----
   !-- == --  
-  target name=deploy depends=release description=-- deploy to Apache 
Nexus
+  target name=deploy depends=release description=deploy to Apache Nexus

!-- generate a pom file --
ivy:makepom ivyfile=${ivy.file} pomfile=${basedir}/pom.xml 
templatefile=ivy/mvn.template
@@ -460,7 +460,7 @@
   /target
 
   !-- target: ivy-download   --
-  target name=ivy-download description=-- download ivy
+  target name=ivy-download description=Download ivy
 available file=${ivy.jar} property=ivy.jar.found/
 antcall target=-ivy-download-unchecked/
   /target
@@ -499,8 +499,8 @@
   author=true
   version=true
   use=true
-  windowtitle=${Name} ${version} API
-  doctitle=${Name} ${version} API
+  windowtitle=${name} ${version} API
+  doctitle=${name} ${version} API
   bottom=Copyright amp;copy; ${year} The Apache Software Foundation
   
 arg value=${javadoc.proxy.host}/
@@ -673,7 +673,7 @@
 
   !-- target: clean-cache  = --
   target name=clean-cache depends=
-description=-- delete ivy cache
+description=delete ivy cache
 ivy:cleancache /
   /target
 
@@ -693,7 +693,7 @@
   /target
 
   target name=rat-sources depends=rat-sources-typedef
- description=-- runs the tasks over src/java
+ description=runs the tasks over src/java
 rat:report xmlns:rat=antlib:org.apache.rat.anttasks
   fileset dir=src
include name=java/**/*/

Modified: nutch/trunk/default.properties
URL: 
http://svn.apache.org/viewvc/nutch/trunk/default.properties?rev=1302134r1=1302133r2=1302134view=diff
==
--- nutch/trunk/default.properties (original)
+++ nutch/trunk/default.properties Sun Mar 18 15:02:50 2012
@@ -1,7 +1,7 @@
 name=nutch
 version=1.5-SNAPSHOT
 final.name=${name}-${version}
-year=2011
+year=2012
 
 basedir = ./
 src.dir = ./src/java
@@ -78,31 +78,59 @@ plugins.protocol=\
 #
 plugins.urlfilter=\
org.apache.nutch.urlfilter.automaton*:\
+   org.apache.nutch.urlfilter.domain*:\
org.apache.nutch.urlfilter.prefix*:\
-   org.apache.nutch.urlfilter.regex*
+   org.apache.nutch.urlfilter.regex*\
+   org.apache.nutch.urlfilter.suffix*:\
+   org.apache.nutch.urlfilter.validator*
+
+#
+# URL Normalizer Plugins
+#
+plugins.urlfilter=\
+   org.apache.nutch.net.urlnormalizer.basic*:\
+   org.apache.nutch.net.urlnormalizer.pass*:\
+   org.apache.nutch.net.urlnormalizer.regex*
 
 #
 # Scoring Plugins
 #
 plugins.scoring=\
-   org.apache.nutch.scoring.opic*
-
+   org.apache.nutch.scoring.link

svn commit: r1302136 - in /nutch/branches/nutchgora: default.properties src/java/overview.html

2012-03-18 Thread lewismc
Author: lewismc
Date: Sun Mar 18 15:03:43 2012
New Revision: 1302136

URL: http://svn.apache.org/viewvc?rev=1302136view=rev
Log:
incremental update to Javadoc

Modified:
nutch/branches/nutchgora/default.properties
nutch/branches/nutchgora/src/java/overview.html

Modified: nutch/branches/nutchgora/default.properties
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/default.properties?rev=1302136r1=1302135r2=1302136view=diff
==
--- nutch/branches/nutchgora/default.properties (original)
+++ nutch/branches/nutchgora/default.properties Sun Mar 18 15:03:43 2012
@@ -112,7 +112,7 @@ plugins.parse=\
org.apache.nutch.parse.swf*:\
org.apache.nutch.parse.tika:\
org.apache.nutch.parse.zip
-
+   
 #
 # Indexing Filter Plugins
 #

Modified: nutch/branches/nutchgora/src/java/overview.html
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/overview.html?rev=1302136r1=1302135r2=1302136view=diff
==
--- nutch/branches/nutchgora/src/java/overview.html (original)
+++ nutch/branches/nutchgora/src/java/overview.html Sun Mar 18 15:03:43 2012
@@ -1,9 +1,10 @@
 html
 head
-   titleNutch/title
+   titleApache Nutch/title
 /head
 body
-Nutch is the open-source search engine.p
+pApache Nutch is an open source web-search software project. /p
+pNutch is a project of the Apache Software Foundation and is part of the 
larger Apache community of developers and users./p
 /body
 /html
 




svn commit: r1302138 - /nutch/trunk/default.properties

2012-03-18 Thread lewismc
Author: lewismc
Date: Sun Mar 18 15:08:41 2012
New Revision: 1302138

URL: http://svn.apache.org/viewvc?rev=1302138view=rev
Log:
incremental update to Javadoc

Modified:
nutch/trunk/default.properties

Modified: nutch/trunk/default.properties
URL: 
http://svn.apache.org/viewvc/nutch/trunk/default.properties?rev=1302138r1=1302137r2=1302138view=diff
==
--- nutch/trunk/default.properties (original)
+++ nutch/trunk/default.properties Sun Mar 18 15:08:41 2012
@@ -79,6 +79,7 @@ plugins.protocol=\
 plugins.urlfilter=\
org.apache.nutch.urlfilter.automaton*:\
org.apache.nutch.urlfilter.domain*:\
+   org.apache.nutch.urlfilter.domainblacklist*:\
org.apache.nutch.urlfilter.prefix*:\
org.apache.nutch.urlfilter.regex*\
org.apache.nutch.urlfilter.suffix*:\




svn commit: r1302161 - in /nutch/trunk/src: java/org/apache/nutch/crawl/ java/org/apache/nutch/parse/ java/org/apache/nutch/plugin/ java/org/apache/nutch/protocol/ java/org/apache/nutch/segment/ java/

2012-03-18 Thread lewismc
Author: lewismc
Date: Sun Mar 18 16:46:33 2012
New Revision: 1302161

URL: http://svn.apache.org/viewvc?rev=1302161view=rev
Log:
commit to address NUTCH-1273

Modified:
nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java
nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java
nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java
nutch/trunk/src/java/org/apache/nutch/crawl/NutchWritable.java
nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java
nutch/trunk/src/java/org/apache/nutch/plugin/PluginDescriptor.java
nutch/trunk/src/java/org/apache/nutch/plugin/PluginManifestParser.java
nutch/trunk/src/java/org/apache/nutch/protocol/Content.java
nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java

nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java?rev=1302161r1=1302160r2=1302161view=diff
==
--- nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java Sun Mar 18 
16:46:33 2012
@@ -262,7 +262,7 @@ public class CrawlDatum implements Writa
 if (version  3) {
   boolean hasMetadata = false;
   if (version  7) {
-MapWritable oldMetaData = new MapWritable();
+org.apache.hadoop.io.MapWritable oldMetaData = new 
org.apache.hadoop.io.MapWritable();
 if (in.readBoolean()) {
   hasMetadata = true;
   metaData = new org.apache.hadoop.io.MapWritable();

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java?rev=1302161r1=1302160r2=1302161view=diff
==
--- nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java Sun Mar 18 
16:46:33 2012
@@ -19,6 +19,7 @@ package org.apache.nutch.crawl;
 
 import java.io.DataOutputStream;
 import java.io.IOException;
+import java.io.Closeable;
 import java.net.URL;
 import java.util.Date;
 import java.util.Iterator;
@@ -35,7 +36,6 @@ import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Closeable;
 import org.apache.hadoop.io.FloatWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.MapFile;

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java?rev=1302161r1=1302160r2=1302161view=diff
==
--- nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java Sun Mar 18 
16:46:33 2012
@@ -37,6 +37,7 @@ import org.apache.nutch.util.TimingUtil;
 
 import java.text.SimpleDateFormat;
 import java.util.Iterator;
+import java.io.Closeable;
 
 /** . */
 public class LinkDbReader extends Configured implements Tool, Closeable {

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/NutchWritable.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/NutchWritable.java?rev=1302161r1=1302160r2=1302161view=diff
==
--- nutch/trunk/src/java/org/apache/nutch/crawl/NutchWritable.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/NutchWritable.java Sun Mar 18 
16:46:33 2012
@@ -31,12 +31,12 @@ public class NutchWritable extends Gener
   org.apache.hadoop.io.BytesWritable.class,
   org.apache.hadoop.io.FloatWritable.class,
   org.apache.hadoop.io.IntWritable.class,
+  org.apache.hadoop.io.MapWritable.class,
   org.apache.hadoop.io.Text.class,
   org.apache.hadoop.io.MD5Hash.class,
   org.apache.nutch.crawl.CrawlDatum.class,
   org.apache.nutch.crawl.Inlink.class,
   org.apache.nutch.crawl.Inlinks.class,
-  org.apache.nutch.crawl.MapWritable.class,
   org.apache.nutch.fetcher.FetcherOutput.class,
   org.apache.nutch.metadata.Metadata.class,
   org.apache.nutch.parse.Outlink.class,

Modified: nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java?rev=1302161r1=1302160r2=1302161view=diff
==
--- nutch/trunk/src/java/org/apache

svn commit: r1302172 - in /nutch/branches/nutchgora/src: java/org/apache/nutch/plugin/ java/org/apache/nutch/protocol/ plugin/protocol-file/src/java/org/apache/nutch/protocol/file/

2012-03-18 Thread lewismc
Author: lewismc
Date: Sun Mar 18 17:23:13 2012
New Revision: 1302172

URL: http://svn.apache.org/viewvc?rev=1302172view=rev
Log:
rollback to -r1302136

Modified:

nutch/branches/nutchgora/src/java/org/apache/nutch/plugin/PluginDescriptor.java

nutch/branches/nutchgora/src/java/org/apache/nutch/plugin/PluginManifestParser.java
nutch/branches/nutchgora/src/java/org/apache/nutch/protocol/Content.java

nutch/branches/nutchgora/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java

Modified: 
nutch/branches/nutchgora/src/java/org/apache/nutch/plugin/PluginDescriptor.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/plugin/PluginDescriptor.java?rev=1302172r1=1302171r2=1302172view=diff
==
--- 
nutch/branches/nutchgora/src/java/org/apache/nutch/plugin/PluginDescriptor.java 
(original)
+++ 
nutch/branches/nutchgora/src/java/org/apache/nutch/plugin/PluginDescriptor.java 
Sun Mar 18 17:23:13 2012
@@ -20,7 +20,6 @@ import java.io.File;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
-import java.net.URI;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Locale;
@@ -215,16 +214,12 @@ public class PluginDescriptor {
 
   /**
* Adds a exported library with a relative path to the plugin directory.
-   * We wish to automatically escape characters that are illegal in URLs. 
-   * It is recommended that new code convert an abstract pathname into a URL 
-   * by first converting it into a URI, via the toURI method, and then 
-   * converting the URI into a URL via the URI.toURL method
* 
* @param pLibPath
*/
   public void addExportedLibRelative(String pLibPath)
   throws MalformedURLException {
-URL url = new File(getPluginPath() + File.separator + 
pLibPath).toURI().toURL();
+URL url = new File(getPluginPath() + File.separator + pLibPath).toURL();
 fExportedLibs.add(url);
   }
 
@@ -247,17 +242,13 @@ public class PluginDescriptor {
   }
 
   /**
-   * Adds a exported library with a relative path to the plugin directory.
-   * We wish to automatically escape characters that are illegal in URLs. 
-   * It is recommended that new code convert an abstract pathname into a URL 
-   * by first converting it into a URI, via the toURI method, and then 
-   * converting the URI into a URL via the URI.toURL method
+   * Adds a not exported library with a plugin directory relative path.
* 
* @param pLibPath
*/
   public void addNotExportedLibRelative(String pLibPath)
   throws MalformedURLException {
-URL url = new File(getPluginPath() + File.separator + 
pLibPath).toURI().toURL();
+URL url = new File(getPluginPath() + File.separator + pLibPath).toURL();
 fNotExportedLibs.add(url);
   }
 
@@ -288,7 +279,7 @@ public class PluginDescriptor {
 try {
   for (File file2 : file.listFiles()) {
 if (file2.getAbsolutePath().endsWith(properties))
-  arrayList.add(file2.getParentFile().toURI().toURL());
+  arrayList.add(file2.getParentFile().toURL());
   }
 } catch (MalformedURLException e) {
   LOG.debug(getPluginId() +   + e.toString());

Modified: 
nutch/branches/nutchgora/src/java/org/apache/nutch/plugin/PluginManifestParser.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/plugin/PluginManifestParser.java?rev=1302172r1=1302171r2=1302172view=diff
==
--- 
nutch/branches/nutchgora/src/java/org/apache/nutch/plugin/PluginManifestParser.java
 (original)
+++ 
nutch/branches/nutchgora/src/java/org/apache/nutch/plugin/PluginManifestParser.java
 Sun Mar 18 17:23:13 2012
@@ -21,7 +21,6 @@ import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.net.MalformedURLException;
 import java.net.URL;
-import java.net.URI;
 import java.net.URLDecoder;
 import java.util.HashMap;
 import java.util.Map;
@@ -148,7 +147,7 @@ public class PluginManifestParser {
   private PluginDescriptor parseManifestFile(String pManifestPath)
   throws MalformedURLException, SAXException, IOException,
   ParserConfigurationException {
-Document document = parseXML(new File(pManifestPath).toURI().toURL());
+Document document = parseXML(new File(pManifestPath).toURL());
 String pPath = new File(pManifestPath).getParent();
 return parsePlugin(document, pPath);
   }

Modified: 
nutch/branches/nutchgora/src/java/org/apache/nutch/protocol/Content.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/protocol/Content.java?rev=1302172r1=1302171r2=1302172view=diff
==
--- nutch/branches/nutchgora/src/java/org/apache/nutch/protocol/Content.java 
(original)
+++ nutch/branches/nutchgora

svn commit: r1298437 - in /nutch/branches/nutchgora: CHANGES.txt build.xml

2012-03-08 Thread lewismc
Author: lewismc
Date: Thu Mar  8 15:47:37 2012
New Revision: 1298437

URL: http://svn.apache.org/viewvc?rev=1298437view=rev
Log:
commit to address NUTCH-1307 and update to CHANGES.txt

Modified:
nutch/branches/nutchgora/CHANGES.txt
nutch/branches/nutchgora/build.xml

Modified: nutch/branches/nutchgora/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1298437r1=1298436r2=1298437view=diff
==
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Thu Mar  8 15:47:37 2012
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release nutchgora - Current Development
 
+* NUTCH-1307 Improve formatting of ant targets for clearer project help 
(lewismc)
+
 * NUTCH-1302 nutchgora job failures should be noticed by submitter (ferdy)
 
 * NUTCH-1298 Pass numTasks to FetcherJob (Dan Rosher via ferdy)

Modified: nutch/branches/nutchgora/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1298437r1=1298436r2=1298437view=diff
==
--- nutch/branches/nutchgora/build.xml (original)
+++ nutch/branches/nutchgora/build.xml Thu Mar  8 15:47:37 2012
@@ -124,7 +124,7 @@
  !-- --
  !-- == --
  target name=release depends=compile-core
-  description=generate the release distribution
+  description=-- generate the release distribution
   copy file=${conf.dir}/nutch-default.xml todir=${build.classes} /
   copy file=${conf.dir}/nutch-site.xml todir=${build.classes} /
 
@@ -201,7 +201,7 @@
  !-- == --
  !-- --
  !-- == --
- target name=deploy depends=release description=deploy to Apache Nexus
+ target name=deploy depends=release description=-- deploy to Apache 
Nexus
 
   !-- generate a pom file --
   ivy:makepom ivyfile=${ivy.file} pomfile=${basedir}/pom.xml
@@ -459,7 +459,7 @@
  /target
 
  !-- target: ivy-download  --
- target name=ivy-download description=Download ivy
+ target name=ivy-download description=-- Download ivy
   available file=${ivy.jar} property=ivy.jar.found /
   antcall target=-ivy-download-unchecked /
  /target
@@ -669,7 +669,7 @@
  /target
 
  !-- target: clean-cache = --
- target name=clean-cache depends= description=delete ivy cache
+ target name=clean-cache depends= description=-- delete ivy cache
   ivy:cleancache /
  /target
 
@@ -689,7 +689,7 @@
  /target
 
  target name=rat-sources depends=rat-sources-typedef
-  description=runs the tasks over src/java
+  description=-- runs the tasks over src/java
   rat:report xmlns:rat=antlib:org.apache.rat.anttasks
fileset dir=src
 include name=java/**/* /




svn commit: r1298444 - in /nutch/branches/nutchgora: CHANGES.txt src/java/org/apache/nutch/crawl/GeneratorMapper.java

2012-03-08 Thread lewismc
Author: lewismc
Date: Thu Mar  8 15:53:37 2012
New Revision: 1298444

URL: http://svn.apache.org/viewvc?rev=1298444view=rev
Log:
commit to address NUTCH-1304 and update to CHANGES.txt

Modified:
nutch/branches/nutchgora/CHANGES.txt

nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorMapper.java

Modified: nutch/branches/nutchgora/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1298444r1=1298443r2=1298444view=diff
==
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Thu Mar  8 15:53:37 2012
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release nutchgora - Current Development
 
+* NUTCH-1304 GeneratorMapper.java dosen't return when skipping and already 
generated mark (Dan Rosher via lewismc)
+
 * NUTCH-1307 Improve formatting of ant targets for clearer project help 
(lewismc)
 
 * NUTCH-1302 nutchgora job failures should be noticed by submitter (ferdy)

Modified: 
nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorMapper.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorMapper.java?rev=1298444r1=1298443r2=1298444view=diff
==
--- 
nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorMapper.java 
(original)
+++ 
nutch/branches/nutchgora/src/java/org/apache/nutch/crawl/GeneratorMapper.java 
Thu Mar  8 15:53:37 2012
@@ -50,6 +50,7 @@ extends GoraMapperString, WebPage, Sele
   if (GeneratorJob.LOG.isDebugEnabled()) {
 GeneratorJob.LOG.debug(Skipping  + url + ; already generated);
   }
+  return;
 }
 
 // If filtering is on don't generate URLs that don't pass URLFilters




svn commit: r1291030 - in /nutch/trunk/src: java/org/apache/nutch/crawl/MapWritable.java java/org/apache/nutch/net/protocols/ProtocolException.java java/org/apache/nutch/parse/OutlinkExtractor.java te

2012-02-19 Thread lewismc
Author: lewismc
Date: Sun Feb 19 18:19:36 2012
New Revision: 1291030

URL: http://svn.apache.org/viewvc?rev=1291030view=rev
Log:
trivial commit to address NUTCH-1276

Modified:
nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java
nutch/trunk/src/java/org/apache/nutch/net/protocols/ProtocolException.java
nutch/trunk/src/java/org/apache/nutch/parse/OutlinkExtractor.java
nutch/trunk/src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java?rev=1291030r1=1291029r2=1291030view=diff
==
--- nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java Sun Feb 19 
18:19:36 2012
@@ -59,6 +59,8 @@ import org.apache.nutch.protocol.Protoco
  * @author Stefan Groschupf
  * @deprecated Use org.apache.hadoop.io.MapWritable instead.
  */
+ 
+@Deprecated
 public class MapWritable implements Writable {
 
   public static final Logger LOG = LoggerFactory.getLogger(MapWritable.class);

Modified: 
nutch/trunk/src/java/org/apache/nutch/net/protocols/ProtocolException.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/net/protocols/ProtocolException.java?rev=1291030r1=1291029r2=1291030view=diff
==
--- nutch/trunk/src/java/org/apache/nutch/net/protocols/ProtocolException.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/net/protocols/ProtocolException.java 
Sun Feb 19 18:19:36 2012
@@ -23,6 +23,7 @@ import java.io.Serializable;
  * Base exception for all protocol handlers
  * @deprecated Use {@link org.apache.nutch.protocol.ProtocolException} instead.
  */
+@Deprecated
 @SuppressWarnings(serial)
 public class ProtocolException extends Exception implements Serializable {
 

Modified: nutch/trunk/src/java/org/apache/nutch/parse/OutlinkExtractor.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/OutlinkExtractor.java?rev=1291030r1=1291029r2=1291030view=diff
==
--- nutch/trunk/src/java/org/apache/nutch/parse/OutlinkExtractor.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/parse/OutlinkExtractor.java Sun Feb 
19 18:19:36 2012
@@ -143,6 +143,7 @@ public class OutlinkExtractor {
* @return Array of codeOutlink/code s within found in plainText
* @deprecated only for tests
*/
+  @Deprecated
   private Outlink[] getOutlinksJakartaRegexpImpl(final String plainText) {
 
 throw new UnsupportedOperationException(
@@ -200,6 +201,7 @@ public class OutlinkExtractor {
* @return Array of codeOutlink/code s within found in plainText
* @deprecated only for tests
*/
+  @Deprecated
   private Outlink[] getOutlinksJDK5Impl(final String plainText) {
 
 throw new UnsupportedOperationException(

Modified: nutch/trunk/src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java?rev=1291030r1=1291029r2=1291030view=diff
==
--- nutch/trunk/src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java (original)
+++ nutch/trunk/src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java Sun Feb 19 
18:19:36 2012
@@ -74,6 +74,7 @@ public class CrawlDBTestUtil {
* @return
* @deprecated Use {@link #createConfiguration()} instead
*/
+  @Deprecated
   public static Configuration create(){
 return createConfiguration();
   }




svn commit: r1231517 - in /nutch/trunk/src: java/org/apache/nutch/crawl/ java/org/apache/nutch/indexer/solr/ java/org/apache/nutch/tools/arc/ java/org/apache/nutch/util/ java/org/apache/nutch/util/dom

2012-01-14 Thread lewismc
Author: lewismc
Date: Sat Jan 14 15:45:46 2012
New Revision: 1231517

URL: http://svn.apache.org/viewvc?rev=1231517view=rev
Log:
commit to try and resolve NUTCH-1176, I expect this not to work 1st time, N.B. 
This doesn't change or even touch syntax of code.

Modified:
nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
nutch/trunk/src/java/org/apache/nutch/crawl/FetchSchedule.java
nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java
nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java
nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java
nutch/trunk/src/java/org/apache/nutch/util/NodeWalker.java
nutch/trunk/src/java/org/apache/nutch/util/domain/DomainSuffix.java
nutch/trunk/src/java/org/apache/nutch/util/domain/DomainSuffixes.java
nutch/trunk/src/java/org/apache/nutch/util/domain/TopLevelDomain.java

nutch/trunk/src/plugin/lib-regex-filter/src/java/org/apache/nutch/urlfilter/api/RegexURLFilterBase.java

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java?rev=1231517r1=1231516r2=1231517view=diff
==
--- nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java Sat 
Jan 14 15:45:46 2012
@@ -64,6 +64,7 @@ public abstract class AbstractFetchSched
* default codefetchInterval/code.
* 
* @param url URL of the page.
+   *
* @param datum datum instance to be initialized (modified in place).
*/
   public CrawlDatum initializeSchedule(Text url, CrawlDatum datum) {
@@ -91,12 +92,15 @@ public abstract class AbstractFetchSched
* marked as GONE. Default implementation increases fetchInterval by 50%,
* and if it exceeds the codemaxInterval/code it calls
* {@link #forceRefetch(Text, CrawlDatum, boolean)}.
-   * @param url URL of the page
-   * @param datum datum instance to be adjusted
+   *
+   * @param url URL of the page.
+   *
+   * @param datum datum instance to be adjusted.
+   *
* @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@param datum}, but
+   * NOTE: this may be a different instance than {@see datum}, but
* implementations should make sure that it contains at least all
-   * information from {@param datum}.
+   * information from {@see datum}.
*/
   public CrawlDatum setPageGoneSchedule(Text url, CrawlDatum datum,
   long prevFetchTime, long prevModifiedTime, long fetchTime) {
@@ -113,15 +117,21 @@ public abstract class AbstractFetchSched
* re-tried due to transient errors. The default implementation
* sets the next fetch time 1 day in the future and increases
* the retry counter.
-   * @param url URL of the page
-   * @param datum page information
-   * @param prevFetchTime previous fetch time
-   * @param prevModifiedTime previous modified time
-   * @param fetchTime current fetch time
+   *
+   * @param url URL of the page.
+   *
+   * @param datum page information.
+   *
+   * @param prevFetchTime previous fetch time.
+   *
+   * @param prevModifiedTime previous modified time.
+   *
+   * @param fetchTime current fetch time.
+   *
* @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@param datum}, but
+   * NOTE: this may be a different instance than {@see datum}, but
* implementations should make sure that it contains at least all
-   * information from {@param datum}.
+   * information from {@see datum}.
*/
   public CrawlDatum setPageRetrySchedule(Text url, CrawlDatum datum,
   long prevFetchTime, long prevModifiedTime, long fetchTime) {
@@ -147,10 +157,14 @@ public abstract class AbstractFetchSched
* {@param curTime} it returns false, and true otherwise. It will also
* check that fetchTime is not too remote (more than 
codemaxInterval/code,
* in which case it lowers the interval and returns true.
-   * @param url URL of the page
-   * @param datum datum instance
+   *
+   * @param url URL of the page.
+   *
+   * @param datum datum instance.
+   *
* @param curTime reference time (usually set to the time when the
* fetchlist generation process was started).
+   *
* @return true, if the page should be considered for inclusion in the 
current
* fetchlist, otherwise false.
*/
@@ -173,8 +187,11 @@ public abstract class AbstractFetchSched
   /**
* This method resets fetchTime, fetchInterval, modifiedTime,
* retriesSinceFetch and page signature, so that it forces refetching.
-   * @param url URL of the page
-   * @param datum datum instance
+   *
+   * @param url URL of the page.
+   *
+   * @param datum datum instance

svn commit: r1227620 - in /nutch/trunk: CHANGES.txt build.xml default.properties

2012-01-05 Thread lewismc
Author: lewismc
Date: Thu Jan  5 15:00:42 2012
New Revision: 1227620

URL: http://svn.apache.org/viewvc?rev=1227620view=rev
Log:
commit to address NUTCH-1237  update to CHANGES.txt

Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/build.xml
nutch/trunk/default.properties

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1227620r1=1227619r2=1227620view=diff
==
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Jan  5 15:00:42 2012
@@ -1,5 +1,9 @@
 Nutch Change Log
 
+* NUTCH-1237 Improve javac arguements for more verbose ouput (lewismc)
+
+* NUTCH-1236 Add link to site documentation to download older versions of 
Nutch (lewismc)
+
 * NUTCH-1146 Prevent generation of _SUCCESS files in output (jnioche)
 
 * NUTCH-1232 Remove site field from index-basic (markus)

Modified: nutch/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1227620r1=1227619r2=1227620view=diff
==
--- nutch/trunk/build.xml (original)
+++ nutch/trunk/build.xml Thu Jan  5 15:00:42 2012
@@ -94,6 +94,7 @@
  target=${javac.version}
  source=${javac.version}
  deprecation=${javac.deprecation}
+  compilerarg value=-Xlint/
   classpath refid=classpath/
 /javac
   /target
@@ -318,6 +319,7 @@
  target=${javac.version}
  source=${javac.version}
  deprecation=${javac.deprecation}
+  compilerarg value=-Xlint/
   classpath refid=test.classpath/
 /javac
   /target

Modified: nutch/trunk/default.properties
URL: 
http://svn.apache.org/viewvc/nutch/trunk/default.properties?rev=1227620r1=1227619r2=1227620view=diff
==
--- nutch/trunk/default.properties (original)
+++ nutch/trunk/default.properties Thu Jan  5 15:00:42 2012
@@ -36,7 +36,7 @@ dist.version.dir=${dist.dir}/${final.nam
 
 javac.debug=on
 javac.optimize=on
-javac.deprecation=off
+javac.deprecation=on
 javac.version= 1.6
 
 runtime.dir=./runtime




svn commit: r1226800 - in /nutch/site: forrest/src/documentation/content/xdocs/ publish/ publish/images/ publish/skin/images/

2012-01-03 Thread lewismc
Author: lewismc
Date: Tue Jan  3 15:04:30 2012
New Revision: 1226800

URL: http://svn.apache.org/viewvc?rev=1226800view=rev
Log:
commit to add an old downloads page, a sonar analysis page and update to 
site.xml to accomodate the changes.

Added:
nutch/site/forrest/src/documentation/content/xdocs/old_downloads.xml
nutch/site/forrest/src/documentation/content/xdocs/sonar.xml
Modified:
nutch/site/forrest/src/documentation/content/xdocs/site.xml
nutch/site/publish/about.html
nutch/site/publish/about.pdf
nutch/site/publish/bot.html
nutch/site/publish/bot.pdf
nutch/site/publish/credits.html
nutch/site/publish/credits.pdf
nutch/site/publish/faq.html
nutch/site/publish/faq.pdf
nutch/site/publish/images/built-with-forrest-button.png
nutch/site/publish/index.html
nutch/site/publish/index.pdf
nutch/site/publish/issue_tracking.html
nutch/site/publish/issue_tracking.pdf
nutch/site/publish/linkmap.html
nutch/site/publish/linkmap.pdf
nutch/site/publish/mailing_lists.html
nutch/site/publish/mailing_lists.pdf
nutch/site/publish/nightly.html
nutch/site/publish/nightly.pdf
nutch/site/publish/skin/images/built-with-forrest-button.png
nutch/site/publish/skin/images/rc-b-l-15-1body-2menu-3menu.png
nutch/site/publish/skin/images/rc-b-r-15-1body-2menu-3menu.png

nutch/site/publish/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png
nutch/site/publish/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png

nutch/site/publish/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png

nutch/site/publish/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png
nutch/site/publish/skin/images/rc-t-r-15-1body-2menu-3menu.png
nutch/site/publish/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png

nutch/site/publish/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png

nutch/site/publish/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png
nutch/site/publish/tutorial.html
nutch/site/publish/tutorial.pdf
nutch/site/publish/version_control.html
nutch/site/publish/version_control.pdf
nutch/site/publish/wiki.html
nutch/site/publish/wiki.pdf

Added: nutch/site/forrest/src/documentation/content/xdocs/old_downloads.xml
URL: 
http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/old_downloads.xml?rev=1226800view=auto
==
--- nutch/site/forrest/src/documentation/content/xdocs/old_downloads.xml (added)
+++ nutch/site/forrest/src/documentation/content/xdocs/old_downloads.xml Tue 
Jan  3 15:04:30 2012
@@ -0,0 +1,35 @@
+?xml version=1.0?
+!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the License); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an AS IS BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+--
+!DOCTYPE document PUBLIC -//APACHE//DTD Documentation V2.0//EN 
+  http://forrest.apache.org/dtd/document-v20.dtd;
+
+document 
+
+header
+  titleOlder Downloads/title 
+/header 
+
+body 
+
+section
+  titleOld Downloads/title pAll old Nutch downloads can be found a 
href=http://archive.apache.org/dist/nutch/;here/a. Please note that the 
versions available here for download are now not configured for general release 
and are unsupported, in addition it is always recommended by the Nutch 
development team to use the most recent stable reslease./p 
+/section
+
+/body
+
+/document

Modified: nutch/site/forrest/src/documentation/content/xdocs/site.xml
URL: 
http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/site.xml?rev=1226800r1=1226799r2=1226800view=diff
==
--- nutch/site/forrest/src/documentation/content/xdocs/site.xml (original)
+++ nutch/site/forrest/src/documentation/content/xdocs/site.xml Tue Jan  3 
15:04:30 2012
@@ -54,9 +54,11 @@ See http://forrest.apache.org/docs/linki
   resources label=Resources
 downloadlabel=Download href=ext:release /
 nightly label=Nightly builds   href=nightly.html /
+sonar   label=Sonar Analysis   href=sonar.html /
 contact label=Mailing Listshref=mailing_lists.html /
 issues  label=Issue Tracking   href=issue_tracking.html

svn commit: r1224750 - in /nutch/branches/nutchgora: CHANGES.txt NOTICE.txt

2011-12-26 Thread lewismc
Author: lewismc
Date: Mon Dec 26 16:17:42 2011
New Revision: 1224750

URL: http://svn.apache.org/viewvc?rev=1224750view=rev
Log:
commit to address NUTCH-1217 and update to CHANGES.txt

Modified:
nutch/branches/nutchgora/CHANGES.txt
nutch/branches/nutchgora/NOTICE.txt

Modified: nutch/branches/nutchgora/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1224750r1=1224749r2=1224750view=diff
==
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Mon Dec 26 16:17:42 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release nutchgora - Current Development
 
+* NUTCH-1217 Update NOTICE.txt to drop some copyrights (lewismc)
+
 * NUTCH-1216 Add trivial comment to lib/native/README.txt (lewismc)
 
 * NUTCH-1198 Less verbose logging when unmapped mimetypes are trying to be 
parsed. (ferdy)

Modified: nutch/branches/nutchgora/NOTICE.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/NOTICE.txt?rev=1224750r1=1224749r2=1224750view=diff
==
--- nutch/branches/nutchgora/NOTICE.txt (original)
+++ nutch/branches/nutchgora/NOTICE.txt Mon Dec 26 16:17:42 2011
@@ -6,70 +6,10 @@ Foundation (http://www.apache.org/).
 
 This product includes software developed by the following copyright owners:
 
-Nutch includes icu4j:
-Copyright (c) 1995-2006 International Business Machines Corporation and
-others
-
-Nutch includes Carrot2:
-Copyright (C) 2002-2006, Dawid Weiss, Stanis�aw Osi�ski.
-awid Weiss; Project administrator, various components, core; 2002; Poland
-Stanisław, Osiński; Lingo clustering component, ODP Input; 2003; Poland
-Karol Gołembniak, Irmina Masłowska; HAOG clustering component; 2006; Poznan 
University of Technology; Poland
-Michał, Wróblewski [*]; AHC clustering components; 2003; Poznan University 
of Technology, Poland
-Paweł, Kowalik [*]; Inductive search engine wrapper; 2003; Poznan University 
of Technology, Poland
-Steven, Schockaert [*]; Fuzzy Ants clustering component; 2004; University of 
Gent, Belgium
-Lang, Ngo Chi [*]; Fuzzy Rough set clustering component; 2004; Warsaw 
University, Poland
-
-Nutch includes Saxpath:
-Copyright (C) 2000-2002 werken digital. All rights reserved.
-
-Nutch includes jaxen:
-Copyright 2003-2006 The Werken Company. All Rights Reserved.
-
-Nutch includes Jdom:
-Copyright (C) 2000-2004 Jason Hunter  Brett McLaughlin.
-All rights reserved
-
-Nutch includes SaxPath:
-Copyright (C) 2000-2002 werken digital. All rights reserved.
- 
-Nutch includes Snowball:
-Copyright (c) 2001, Dr Martin Porter
-(for the Java developments) Copyright (c) 2002, Richard Boulton. 
-
-Nutch includes ViolinStrings:
-Copyright (c) Michael Schmeling 1998, 2000 - All Rights Reserve
-
-Nutch includes Cyperneko:
-(C) Copyright 2002,2003, Andy Clark.  All rights reserved.
-
-Nutch includes Jena:
-(c) Copyright 2000, 2001, 2002, 2003, 2004 Hewlett-Packard Development 
Company, LP
-All rights reserved.
-
-Nutch includes BouncyCastle:
-Copyright (c) 2000 - 2008 The Legion Of The Bouncy Castle 
(http://www.bouncycastle.org)
-
-Nutch includes FontBox:
-Copyright (c) 2003-2005, www.fontbox.org
-
-Nutch includes JempBox:
-Copyright (c) 2006-2007, www.jempbox.org
-All rights reserved.
-
-Nutch includes PDFBox:
-Copyright (c) 2003-2005, www.pdfbox.org
-All rights reserved.
-
 Nutch includes JavaSWF:
 Copyright (c) 2001-2005, David N. Main, All rights reserved.
 
-Nutch includes Json Lib:
-This product includes software developed by Douglas Crockford 
-(http://www.crockford.com).
-
 Nutch includes Automaton:
 This package is Copyright © 2001-2008 Anders Møller. All rights reserved.
 
-Nutch includes Rome:
-Copyright 2004 Sun Microsystems, Inc. 
+




svn commit: r1199860 - in /nutch/branches/nutchgora: build.xml doap.rdf

2011-11-09 Thread lewismc
Author: lewismc
Date: Wed Nov  9 17:06:46 2011
New Revision: 1199860

URL: http://svn.apache.org/viewvc?rev=1199860view=rev
Log:
commit to update the doap file and to configure a unique key for Sonar ant task.

Modified:
nutch/branches/nutchgora/build.xml
nutch/branches/nutchgora/doap.rdf

Modified: nutch/branches/nutchgora/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1199860r1=1199859r2=1199860view=diff
==
--- nutch/branches/nutchgora/build.xml (original)
+++ nutch/branches/nutchgora/build.xml Wed Nov  9 17:06:46 2011
@@ -695,7 +695,7 @@
property name=sonar.binaries value=${build.dir}/plugins /
property name=sonar.tests value=${test.src.dir} /
 
-   sonar:sonar workDir=${base.dir} key=org.apache.nutch:nutch
+   sonar:sonar workDir=${base.dir} key=org.apache.nutch:branch
version=2.0-SNAPSHOT 
xmlns:sonar=antlib:org.sonar.ant /
/target
 /project

Modified: nutch/branches/nutchgora/doap.rdf
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/doap.rdf?rev=1199860r1=1199859r2=1199860view=diff
==
--- nutch/branches/nutchgora/doap.rdf (original)
+++ nutch/branches/nutchgora/doap.rdf Wed Nov  9 17:06:46 2011
@@ -27,11 +27,11 @@
 nameApache Nutch/name
 homepage rdf:resource=http://nutch.apache.org; /
 asfext:pmc rdf:resource=http://nutch.apache.org; /
-shortdescNutch 2.0 is a beeding edge (trunk) development of the Apache 
Nutch web search software./shortdesc
+shortdescNutch 2.0 is a branch development of the Apache Nutch web 
search software./shortdesc
 descriptionApache Nutch 2.0 maintains a refined architecture by 
delegating searching, parsing, and data storage to other software projects. In 
particular the storage layer has been delegated to the object relational 
mapping framework Gora (Apache Incubator) enabling the focus of Nutch 2.0 to be 
entirely on web crawling. This logic promotes Nutch 2.0 as a simpler, focussed 
web crawler enabling easy integration with other resources. /description
 bug-database rdf:resource=http://issues.apache.org/jira/browse/NUTCH; /
 mailing-list 
rdf:resource=http://www.mail-archive.com/dev%40nutch.apache.org/; /
-download-page rdf:resource=http://svn.apache.org/repos/asf/nutch/trunk/; 
/
+download-page 
rdf:resource=http://svn.apache.org/repos/asf/nutch/branches/nutchgora; /
 programming-languageJava/programming-language
 category rdf:resource=http://projects.apache.org/category/web-framework; 
/
 release
@@ -43,8 +43,8 @@
 /release
 repository
   SVNRepository
-location 
rdf:resource=https://svn.apache.org/repos/asf/nutch/trunk//
-browse rdf:resource=http://svn.apache.org/viewvc/nutch/trunk//
+location 
rdf:resource=https://svn.apache.org/repos/asf/nutch/branches/nutchgora/
+browse 
rdf:resource=http://svn.apache.org/viewvc/nutch/branches/nucthgora/
   /SVNRepository
 /repository
 maintainer




svn commit: r1199863 - /nutch/trunk/build.xml

2011-11-09 Thread lewismc
Author: lewismc
Date: Wed Nov  9 17:07:32 2011
New Revision: 1199863

URL: http://svn.apache.org/viewvc?rev=1199863view=rev
Log:
commit to assign a unique key to build.xml Ant Sonar task.

Modified:
nutch/trunk/build.xml

Modified: nutch/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1199863r1=1199862r2=1199863view=diff
==
--- nutch/trunk/build.xml (original)
+++ nutch/trunk/build.xml Wed Nov  9 17:07:32 2011
@@ -722,7 +722,7 @@
 property name=sonar.binaries value=${build.dir}/plugins /
 property name=sonar.tests value=${test.src.dir} /
 
-sonar:sonar workDir=${base.dir} key=org.apache.nutch:nutch 
+sonar:sonar workDir=${base.dir} key=org.apache.nutch:trunk 
  version=1.4-SNAPSHOT xmlns:sonar=antlib:org.sonar.ant/
   /target





svn commit: r1197624 - /nutch/trunk/build.xml

2011-11-04 Thread lewismc
Author: lewismc
Date: Fri Nov  4 16:15:24 2011
New Revision: 1197624

URL: http://svn.apache.org/viewvc?rev=1197624view=rev
Log:
commit to reconfigure ant sonar task, again.

Modified:
nutch/trunk/build.xml

Modified: nutch/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1197624r1=1197623r2=1197624view=diff
==
--- nutch/trunk/build.xml (original)
+++ nutch/trunk/build.xml Fri Nov  4 16:15:24 2011
@@ -712,26 +712,18 @@
   
   !-- Add the target --
   target name=sonar
-sonar:sonar workDir=${base.dir} key=org.apache.nutch:nutch 
version=1.4 xmlns:sonar=antlib:org.sonar.ant/
-
-!-- source directories (required) --
-sources
-  path location=${src.dir} /
-/sources
+
+!-- list of mandatory source directories (required) --
+property name=sonar.sources value=${src.dir}/
  
 !-- list of properties (optional) --
-property key=sonar.projectName value=Nutch Trunk 1.4 Sonar Analysis /
-property key=sonar.dynamicAnalysis value=false /
- 
-!-- test source directories (optional) --
-tests
-  path location=${test.src.dir} /
-/tests
+property name=sonar.projectName value=Nutch Trunk 1.4 Sonar Analysis 
/
+property name=sonar.binaries value=${build.dir}/classes /
+property name=sonar.binaries value=${build.dir}/plugins /
+property name=sonar.tests value=${test.src.dir} /
 
-!-- binaries directories, which contain for example the compiled Java 
bytecode --
- binaries
-   path location=${build.dir}/classes/
- /binaries
+sonar:sonar workDir=${base.dir} key=org.apache.nutch:nutch 
+ version=1.4-SNAPSHOT xmlns:sonar=antlib:org.sonar.ant/
   /target

 /project




svn commit: r1196537 - /nutch/trunk/build.xml

2011-11-02 Thread lewismc
Author: lewismc
Date: Wed Nov  2 10:58:20 2011
New Revision: 1196537

URL: http://svn.apache.org/viewvc?rev=1196537view=rev
Log:
commit to add (hopefully) final configuration parameters to ant sonar target

Modified:
nutch/trunk/build.xml

Modified: nutch/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1196537r1=1196536r2=1196537view=diff
==
--- nutch/trunk/build.xml (original)
+++ nutch/trunk/build.xml Wed Nov  2 10:58:20 2011
@@ -706,7 +706,7 @@
   
   !-- Define the Sonar task if this hasn't been done in a common script --
   taskdef uri=antlib:org.sonar.ant resource=org/sonar/ant/antlib.xml
-classpath path=/
+classpath path=${ant.library.dir}/
   /taskdef
   
   !-- Add the target --




svn commit: r1196800 - /nutch/trunk/build.xml

2011-11-02 Thread lewismc
Author: lewismc
Date: Wed Nov  2 21:08:44 2011
New Revision: 1196800

URL: http://svn.apache.org/viewvc?rev=1196800view=rev
Log:
commit to add MySQL driver classpath to sonar analysis target

Modified:
nutch/trunk/build.xml

Modified: nutch/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1196800r1=1196799r2=1196800view=diff
==
--- nutch/trunk/build.xml (original)
+++ nutch/trunk/build.xml Wed Nov  2 21:08:44 2011
@@ -707,6 +707,7 @@
   !-- Define the Sonar task if this hasn't been done in a common script --
   taskdef uri=antlib:org.sonar.ant resource=org/sonar/ant/antlib.xml
 classpath path=${ant.library.dir}/
+classpath path=${mysql.library.dir}/
   /taskdef
   
   !-- Add the target --




svn commit: r1196823 - /nutch/trunk/build.xml

2011-11-02 Thread lewismc
Author: lewismc
Date: Wed Nov  2 22:00:32 2011
New Revision: 1196823

URL: http://svn.apache.org/viewvc?rev=1196823view=rev
Log:
commit to make trivial change to working directory configuration for ant sonar 
target...

Modified:
nutch/trunk/build.xml

Modified: nutch/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1196823r1=1196822r2=1196823view=diff
==
--- nutch/trunk/build.xml (original)
+++ nutch/trunk/build.xml Wed Nov  2 22:00:32 2011
@@ -712,7 +712,7 @@
   
   !-- Add the target --
   target name=sonar
-sonar:sonar workDir=${build.dir} key=org.apache.nutch:nutch 
version=1.4 xmlns:sonar=antlib:org.sonar.ant/
+sonar:sonar workDir=${base.dir} key=org.apache.nutch:nutch 
version=1.4 xmlns:sonar=antlib:org.sonar.ant/
 
 !-- source directories (required) --
 sources




svn commit: r1195403 - in /nutch/branches/nutchgora: CHANGES.txt build.xml conf/gora-cassandra-mapping.xml conf/gora-hbase-mapping.xml conf/gora-sql-mapping.xml conf/nutch-default.xml ivy/ivy.xml

2011-10-31 Thread lewismc
Author: lewismc
Date: Mon Oct 31 10:49:17 2011
New Revision: 1195403

URL: http://svn.apache.org/viewvc?rev=1195403view=rev
Log:
commit to address NUTCH-902 and update to changes.txt

Added:
nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml
nutch/branches/nutchgora/conf/gora-hbase-mapping.xml
Modified:
nutch/branches/nutchgora/CHANGES.txt
nutch/branches/nutchgora/build.xml
nutch/branches/nutchgora/conf/gora-sql-mapping.xml
nutch/branches/nutchgora/conf/nutch-default.xml
nutch/branches/nutchgora/ivy/ivy.xml

Modified: nutch/branches/nutchgora/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1195403r1=1195402r2=1195403view=diff
==
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Mon Oct 31 10:49:17 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release nutchgora - Current Development
 
+* NUTCH-902 Add all necessary files and configuration so that nutch can be 
used with different backends out-of-the-box (lewismc)
+
 * NUTCH-1081  1135 ant tests fail  Fix TestGoraStorage for Nutchgora (Ferdy 
via lewismc)
 
 * NUTCH-1156 building errors with gora-hbase as a backend; update ivy.xml to 
use correct dependancies (Ferdy via lewismc)

Modified: nutch/branches/nutchgora/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1195403r1=1195402r2=1195403view=diff
==
--- nutch/branches/nutchgora/build.xml (original)
+++ nutch/branches/nutchgora/build.xml Mon Oct 31 10:49:17 2011
@@ -15,7 +15,7 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 --
-project name=Nutch default=runtime xmlns:ivy=antlib:org.apache.ivy.ant 
xmlns:artifact=antlib:org.apache.maven.artifact.ant
+project name=Nutchgora default=runtime 
xmlns:ivy=antlib:org.apache.ivy.ant 
xmlns:artifact=antlib:org.apache.maven.artifact.ant
 
   !-- Load all the default properties, and any the user wants--
   !-- to contribute (without having to type -D or edit this file --

Added: nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml?rev=1195403view=auto
==
--- nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml (added)
+++ nutch/branches/nutchgora/conf/gora-cassandra-mapping.xml Mon Oct 31 
10:49:17 2011
@@ -0,0 +1,43 @@
+?xml version=1.0 encoding=UTF-8?
+
+gora-orm
+
+keyspace name=webpage cluster=Test Cluster host=localhost
+family name=p/
+family name=f/
+family name=sc type=super/
+/keyspace
+class keyClass=java.lang.String name=org.apache.nutch.storage.WebPage
+
+!-- fetch fields --
+field name=baseUrl family=f qualifier=bas/
+field name=status family=f qualifier=st/
+field name=prevFetchTime family=f qualifier=pts/
+field name=fetchTime family=f qualifier=ts/
+field name=fetchInterval family=f qualifier=fi/
+field name=retriesSinceFetch family=f qualifier=rsf/
+field name=reprUrl family=f qualifier=rpr/
+field name=content family=f qualifier=cnt/
+field name=contentType family=f qualifier=typ/
+field name=modifiedTime family=f qualifier=mod/
+
+!-- parse fields --
+field name=title family=p qualifier=t/
+field name=text family=p qualifier=c/
+field name=signature family=p qualifier=sig/
+field name=prevSignature family=p qualifier=psig/
+
+!-- score fields --
+field name=score family=f qualifier=s/
+
+!-- super columns --
+field name=markers family=sc qualifier=mk/
+field name=inlinks family=sc qualifier=il/
+field name=outlinks family=sc qualifier=ol/
+field name=metadata family=sc qualifier=mtdt/
+field name=headers family=sc qualifier=h/
+field name=parseStatus family=sc qualifier=pas/
+field name=protocolStatus family=sc qualifier=prs/
+/class
+
+/gora-orm
\ No newline at end of file

Added: nutch/branches/nutchgora/conf/gora-hbase-mapping.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/conf/gora-hbase-mapping.xml?rev=1195403view=auto
==
--- nutch/branches/nutchgora/conf/gora-hbase-mapping.xml (added)
+++ nutch/branches/nutchgora/conf/gora-hbase-mapping.xml Mon Oct 31 10:49:17 
2011
@@ -0,0 +1,46 @@
+?xml version=1.0 encoding=UTF-8?
+
+gora-orm
+
+table name=webpage
+family name=p/ !-- This can also have params like compression, 
bloom filters --
+family name=f/
+family name=s/
+family name=il/
+family name=ol

svn commit: r1186985 - /nutch/branches/nutchgora/build.xml

2011-10-20 Thread lewismc
Author: lewismc
Date: Thu Oct 20 18:34:25 2011
New Revision: 1186985

URL: http://svn.apache.org/viewvc?rev=1186985view=rev
Log:
 commit to fix upgrade to language-identifier targets which I missed when 
addressing a previous commit

Modified:
nutch/branches/nutchgora/build.xml

Modified: nutch/branches/nutchgora/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1186985r1=1186984r2=1186985view=diff
==
--- nutch/branches/nutchgora/build.xml (original)
+++ nutch/branches/nutchgora/build.xml Thu Oct 20 18:34:25 2011
@@ -167,7 +167,7 @@
   packageset dir=${plugins.dir}/urlfilter-regex/src/java/
   packageset dir=${plugins.dir}/urlfilter-prefix/src/java/
   packageset dir=${plugins.dir}/creativecommons/src/java/
-  packageset dir=${plugins.dir}/languageidentifier/src/java/
+  packageset dir=${plugins.dir}/language-identifier/src/java/
   
   link href=${javadoc.link.java}/
   link href=${javadoc.link.lucene}/
@@ -530,7 +530,7 @@
   packageset dir=${plugins.dir}/urlfilter-regex/src/java/
   packageset dir=${plugins.dir}/urlfilter-prefix/src/java/
   packageset dir=${plugins.dir}/creativecommons/src/java/
-  packageset dir=${plugins.dir}/languageidentifier/src/java/
+  packageset dir=${plugins.dir}/language-identifier/src/java/
   
   link href=${javadoc.link.java}/
   link href=${javadoc.link.lucene}/




svn commit: r1185868 - in /nutch/site: forrest/src/documentation/content/xdocs/ publish/ publish/images/ publish/skin/images/

2011-10-18 Thread lewismc
Author: lewismc
Date: Tue Oct 18 21:10:55 2011
New Revision: 1185868

URL: http://svn.apache.org/viewvc?rev=1185868view=rev
Log:
commit to make trivial update to credits.html

Modified:
nutch/site/forrest/src/documentation/content/xdocs/about.xml
nutch/site/forrest/src/documentation/content/xdocs/credits.xml
nutch/site/publish/about.html
nutch/site/publish/about.pdf
nutch/site/publish/bot.html
nutch/site/publish/bot.pdf
nutch/site/publish/broken-links.xml
nutch/site/publish/credits.html
nutch/site/publish/credits.pdf
nutch/site/publish/faq.html
nutch/site/publish/faq.pdf
nutch/site/publish/images/built-with-forrest-button.png
nutch/site/publish/index.html
nutch/site/publish/issue_tracking.html
nutch/site/publish/issue_tracking.pdf
nutch/site/publish/linkmap.html
nutch/site/publish/linkmap.pdf
nutch/site/publish/mailing_lists.html
nutch/site/publish/mailing_lists.pdf
nutch/site/publish/nightly.html
nutch/site/publish/nightly.pdf
nutch/site/publish/skin/images/built-with-forrest-button.png
nutch/site/publish/tutorial.html
nutch/site/publish/tutorial.pdf
nutch/site/publish/version_control.html
nutch/site/publish/version_control.pdf
nutch/site/publish/wiki.html
nutch/site/publish/wiki.pdf

Modified: nutch/site/forrest/src/documentation/content/xdocs/about.xml
URL: 
http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/about.xml?rev=1185868r1=1185867r2=1185868view=diff
==
--- nutch/site/forrest/src/documentation/content/xdocs/about.xml (original)
+++ nutch/site/forrest/src/documentation/content/xdocs/about.xml Tue Oct 18 
21:10:55 2011
@@ -40,7 +40,7 @@
plugin infrastructure./p
 
   pFor more information about Apache Nutch, please see the a
-  href=wiki.htmlNutch wiki./a/p
+  href=http://wiki.apache.org/nutch/;Nutch wiki./a/p
 
   
 

Modified: nutch/site/forrest/src/documentation/content/xdocs/credits.xml
URL: 
http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/credits.xml?rev=1185868r1=1185867r2=1185868view=diff
==
--- nutch/site/forrest/src/documentation/content/xdocs/credits.xml (original)
+++ nutch/site/forrest/src/documentation/content/xdocs/credits.xml Tue Oct 18 
21:10:55 2011
@@ -37,7 +37,7 @@
   lia href=http://openindex.io/;Markus Jelsma/a/li
   lia href=http://people.apache.org/~siren;Sami Siren/a/li
   lia href=http://techvineyard.blogspot.com/;Alexis de Tréglodé/a/li
-  liLewis John McGibbney/li
+  lia 
href=http://www.linkedin.com/pub/lewis-john-mcgibbney/26/a92/a39;Lewis John 
McGibbney/a/li
 /ul
 /section
 

Modified: nutch/site/publish/about.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/about.html?rev=1185868r1=1185867r2=1185868view=diff
==
--- nutch/site/publish/about.html (original)
+++ nutch/site/publish/about.html Tue Oct 18 21:10:55 2011
@@ -3,7 +3,7 @@
 head
 META http-equiv=Content-Type content=text/html; charset=UTF-8
 meta content=Apache Forrest name=Generator
-meta name=Forrest-version content=0.9
+meta name=Forrest-version content=0.10-dev
 meta name=Forrest-skin-name content=nutch
 titleAbout Apache Nutch/title
 link type=text/css href=skin/basic.css rel=stylesheet
@@ -271,7 +271,7 @@ document.write(Last Published:  + docu
 pThe system can be enhanced (eg other document formats can be 
   parsed) using a highly flexible, easily extensible and thoroughly 
maintained
plugin infrastructure./p
-pFor more information about Apache Nutch, please see the a 
href=wiki.htmlNutch wiki./a
+pFor more information about Apache Nutch, please see the a 
href=http://wiki.apache.org/nutch/;Nutch wiki./a
 /p
 /div
 

Modified: nutch/site/publish/about.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/about.pdf?rev=1185868r1=1185867r2=1185868view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/bot.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/bot.html?rev=1185868r1=1185867r2=1185868view=diff
==
--- nutch/site/publish/bot.html (original)
+++ nutch/site/publish/bot.html Tue Oct 18 21:10:55 2011
@@ -3,7 +3,7 @@
 head
 META http-equiv=Content-Type content=text/html; charset=UTF-8
 meta content=Apache Forrest name=Generator
-meta name=Forrest-version content=0.9
+meta name=Forrest-version content=0.10-dev
 meta name=Forrest-skin-name content=nutch
 titleApache Nutch robot/title
 link type=text/css href=skin/basic.css rel=stylesheet

Modified: nutch/site/publish/bot.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/bot.pdf?rev=1185868r1=1185867r2=1185868view=diff

svn commit: r1182504 - in /nutch/branches/nutchgora: CHANGES.txt src/java/org/apache/nutch/parse/ParserFactory.java src/plugin/parse-html/plugin.xml

2011-10-12 Thread lewismc
Author: lewismc
Date: Wed Oct 12 18:18:48 2011
New Revision: 1182504

URL: http://svn.apache.org/viewvc?rev=1182504view=rev
Log:
commit to address NUTCH-1097 and update to changes.txt

Modified:
nutch/branches/nutchgora/CHANGES.txt
nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserFactory.java
nutch/branches/nutchgora/src/plugin/parse-html/plugin.xml

Modified: nutch/branches/nutchgora/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1182504r1=1182503r2=1182504view=diff
==
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Wed Oct 12 18:18:48 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release nutchgora - Current Development
 
+* NUTCH-1097 application/xhtml+xml should be enabled in plugin.xml of 
parse-html; allow multiple mimetypes for plugin.xml (Ferdy via lewismc)
+
 * Change plugin source directory languageidentifier to language-identifier 
(lewismc)
 
 * NUTCH-1132, 1133  1134 Fix TestGenerator, TestInjector  TestFetcher 
respectively (lewismc)

Modified: 
nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserFactory.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserFactory.java?rev=1182504r1=1182503r2=1182504view=diff
==
--- nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserFactory.java 
(original)
+++ nutch/branches/nutchgora/src/java/org/apache/nutch/parse/ParserFactory.java 
Wed Oct 12 18:18:48 2011
@@ -356,14 +356,13 @@ public final class ParserFactory {
   // NotMappedParserException
 
   for (int i=0; iextensions.length; i++) {
-if (extensions[i].getAttribute(contentType) != null
- extensions[i].getAttribute(contentType).equals(
-contentType)) {
-  extList.add(extensions[i]);
-}
-else if (*.equals(extensions[i].getAttribute(contentType))){
+   if (*.equals(extensions[i].getAttribute(contentType))){
   extList.add(0, extensions[i]);
 }
+   else if (extensions[i].getAttribute(contentType) != null
+ 
contentType.matches(escapeContentType(extensions[i].getAttribute(contentType
 {
+  extList.add(extensions[i]);
+}
   }
 
   if (extList.size()  0) {
@@ -391,10 +390,19 @@ public final class ParserFactory {
 return (extList.size()  0) ? extList : null;
   }
 
-  private boolean match(Extension extension, String id, String type) {
-return ((id.equals(extension.getId())) 
-(type.equals(extension.getAttribute(contentType)) || 
extension.getAttribute(contentType).equals(*) ||
- type.equals(DEFAULT_PLUGIN)));
+  private String escapeContentType(String contentType) {
+   // Escapes contentType in order to use as a regex 
+   // (and keep backwards compatibility).
+   // This enables to accept multiple types for a single parser. 
+   return contentType.replace(+, \\+).replace(., \\.);
+   }
+
+
+   private boolean match(Extension extension, String id, String type) {
+return (id.equals(extension.getId())) 
+(extension.getAttribute(contentType).equals(*) ||
+ 
type.matches(escapeContentType(extension.getAttribute(contentType))) ||
+ type.equals(DEFAULT_PLUGIN));
   }
 
   /** Get an extension from its id and supported content-type. */

Modified: nutch/branches/nutchgora/src/plugin/parse-html/plugin.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/plugin/parse-html/plugin.xml?rev=1182504r1=1182503r2=1182504view=diff
==
--- nutch/branches/nutchgora/src/plugin/parse-html/plugin.xml (original)
+++ nutch/branches/nutchgora/src/plugin/parse-html/plugin.xml Wed Oct 12 
18:18:48 2011
@@ -39,7 +39,7 @@
 
   implementation id=org.apache.nutch.parse.html.HtmlParser
   class=org.apache.nutch.parse.html.HtmlParser
-parameter name=contentType value=text/html/
+parameter name=contentType value=text/html|application/xhtml+xml/
 parameter name=pathSuffix value=/
   /implementation
 




svn commit: r1182506 - in /nutch/trunk: CHANGES.txt src/java/org/apache/nutch/parse/ParserFactory.java src/plugin/parse-html/plugin.xml

2011-10-12 Thread lewismc
Author: lewismc
Date: Wed Oct 12 18:22:20 2011
New Revision: 1182506

URL: http://svn.apache.org/viewvc?rev=1182506view=rev
Log:
commit to address NUTCH-1097 and update to changes.txt

Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java
nutch/trunk/src/plugin/parse-html/plugin.xml

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1182506r1=1182505r2=1182506view=diff
==
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Wed Oct 12 18:22:20 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 1.4 - Current development
 
+* NUTCH-1097 application/xhtml+xml should be enabled in plugin.xml of 
parse-html; allow multiple mimetypes for plugin.xml (Ferdy via lewismc)
+
 * NUTCH-797 Fix parse-tika and parse-html to use relative URL resolution per 
RFC-3986
   (Robert Hohman, ab)
 

Modified: nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java?rev=1182506r1=1182505r2=1182506view=diff
==
--- nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java Wed Oct 12 
18:22:20 2011
@@ -343,14 +343,13 @@ public final class ParserFactory {
   // NotMappedParserException
   
   for (int i=0; iextensions.length; i++) {
-if (extensions[i].getAttribute(contentType) != null
- extensions[i].getAttribute(contentType).equals(
-contentType)) {
-  extList.add(extensions[i]);
-}
-else if (*.equals(extensions[i].getAttribute(contentType))){
+   if (*.equals(extensions[i].getAttribute(contentType))){
   extList.add(0, extensions[i]);
 }
+else if (extensions[i].getAttribute(contentType) != null
+ 
contentType.matches(escapeContentType(extensions[i].getAttribute(contentType
 {
+  extList.add(extensions[i]);
+}
   }
   
   if (extList.size()  0) {
@@ -377,10 +376,18 @@ public final class ParserFactory {
 
 return (extList.size()  0) ? extList : null;
   }
+  
+  private String escapeContentType(String contentType) {
+   // Escapes contentType in order to use as a regex 
+   // (and keep backwards compatibility).
+   // This enables to accept multiple types for a single parser. 
+   return contentType.replace(+, \\+).replace(., \\.);
+   }
 
   private boolean match(Extension extension, String id, String type) {
 return ((id.equals(extension.getId())) 
-(type.equals(extension.getAttribute(contentType)) || 
extension.getAttribute(contentType).equals(*) ||
+(extension.getAttribute(contentType).equals(*) || 
+ 
type.matches(escapeContentType(extension.getAttribute(contentType))) ||
  type.equals(DEFAULT_PLUGIN)));
   }
   

Modified: nutch/trunk/src/plugin/parse-html/plugin.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-html/plugin.xml?rev=1182506r1=1182505r2=1182506view=diff
==
--- nutch/trunk/src/plugin/parse-html/plugin.xml (original)
+++ nutch/trunk/src/plugin/parse-html/plugin.xml Wed Oct 12 18:22:20 2011
@@ -39,7 +39,7 @@
 
   implementation id=org.apache.nutch.parse.html.HtmlParser
   class=org.apache.nutch.parse.html.HtmlParser
-parameter name=contentType value=text/html/
+parameter name=contentType value=text/html|application/xhtml+xml/
 parameter name=pathSuffix value=/
   /implementation
 




svn commit: r1182511 - in /nutch/branches/nutchgora: CHANGES.txt build.xml

2011-10-12 Thread lewismc
Author: lewismc
Date: Wed Oct 12 18:27:42 2011
New Revision: 1182511

URL: http://svn.apache.org/viewvc?rev=1182511view=rev
Log:
commit to address NUTCH-1109 and update to changes.txt

Modified:
nutch/branches/nutchgora/CHANGES.txt
nutch/branches/nutchgora/build.xml

Modified: nutch/branches/nutchgora/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1182511r1=1182510r2=1182511view=diff
==
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Wed Oct 12 18:27:42 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release nutchgora - Current Development
 
+* NUTCH-1109 Add Sonar targets to Ant build.xml (lewismc)
+
 * NUTCH-1097 application/xhtml+xml should be enabled in plugin.xml of 
parse-html; allow multiple mimetypes for plugin.xml (Ferdy via lewismc)
 
 * Change plugin source directory languageidentifier to language-identifier 
(lewismc)

Modified: nutch/branches/nutchgora/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1182511r1=1182510r2=1182511view=diff
==
--- nutch/branches/nutchgora/build.xml (original)
+++ nutch/branches/nutchgora/build.xml Wed Oct 12 18:27:42 2011
@@ -705,5 +705,37 @@
   /fileset
 /rat:report
   /target
-   
+
+  !-- == --
+  !-- SONAR targets  --
+  !-- == --
+  
+  !-- Define the Sonar task if this hasn't been done in a common script --
+  taskdef uri=antlib:org.sonar.ant resource=org/sonar/ant/antlib.xml
+classpath path=/
+  /taskdef
+  
+  !-- Add the target --
+  target name=sonar
+sonar:sonar workDir=${build.dir} key=org.apache.nutch:nutch 
version=2.0 xmlns:sonar=antlib:org.sonar.ant/
+
+!-- source directories (required) --
+sources
+  path location=${src.dir} /
+/sources
+ 
+!-- list of properties (optional) --
+property key=sonar.projectName value=Nutchgora 2.0 Sonar Analysis /
+property key=sonar.dynamicAnalysis value=false /
+ 
+!-- test source directories (optional) --
+tests
+  path location=${test.src.dir} /
+/tests
+
+!-- binaries directories, which contain for example the compiled Java 
bytecode --
+ binaries
+   path location=${build.dir}/classes/
+ /binaries
+  /target
 /project




svn commit: r1179603 - in /nutch/trunk: CHANGES.txt build.xml

2011-10-06 Thread lewismc
Author: lewismc
Date: Thu Oct  6 12:49:31 2011
New Revision: 1179603

URL: http://svn.apache.org/viewvc?rev=1179603view=rev
Log:
commit to address NUTCH-1136 and update to changes.txt

Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/build.xml

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1179603r1=1179602r2=1179603view=diff
==
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Oct  6 12:49:31 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 1.4 - Current development
 
+* NUTCH-1136 Ant pmd target is broken
+
 * NUTCH-1058 Upgrade Solr schema to version 1.4 (markus)
 
 * NUTCH-1137 LinkDB invertlinks other options ignored when using -dir option 
(Sebastian Nagel, markus)

Modified: nutch/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1179603r1=1179602r2=1179603view=diff
==
--- nutch/trunk/build.xml (original)
+++ nutch/trunk/build.xml Thu Oct  6 12:49:31 2011
@@ -323,40 +323,8 @@
   /target
 
   !-- == --
-  !-- Run code checks (PMD)  -- 
+  !-- Run Nutch proxy-- 
   !-- == --
-  target name=pmd depends=compile
-   property name=pmd.report location=${build.dir}/pmd-report.html /
-   taskdef name=pmd classname=net.sourceforge.pmd.ant.PMDTask
- classpath
- fileset dir=${lib.dir}
-include name=pmd-ext/*.jar /
-include name=xerces*.jar /
-  /fileset
- /classpath
-   /taskdef
-   pmd shortFilenames=true failonerror=true 
failOnRuleViolation=false
-encoding=${build.encoding} 
failuresPropertyName=pmd.failures
- rulesetunusedcode/ruleset
-  !--rulesetbasic/ruleset--
-  !--rulesetoptimizations/ruleset--
-  formatter type=html toFile=${pmd.report} /
- !-- formatter type=xml toFile=${tempbuild}/$report_pmd.xml/ --
-   fileset dir=${basedir}/src
-   include name=java/**/*.java/
-   include name=plugin/**/*.java/
-  /fileset
-/pmd
-   condition property=pmd.stop value=true
-  and
-isset property=pmd.failures /
-  not
-equals arg1=0 arg2=${pmd.failures} trim=true /
-  /not
-  /and
-   /condition
-   fail if=pmd.stopFAILURE: PMD shows ${pmd.failures} rule violations. 
See ${pmd.report} for details./fail
-  /target
 
   target name=proxy depends=job, compile-core-test
 java classname=org.apache.nutch.tools.proxy.TestbedProxy fork=true
@@ -370,6 +338,10 @@
 /java
   /target
 
+  !-- == --
+  !-- Run Nutch benchmarking analysis-- 
+  !-- == --
+
   target name=benchmark
 java classname=org.apache.nutch.tools.Benchmark fork=true
   classpath refid=test.classpath/




svn commit: r1179605 - in /nutch/branches/nutchgora: CHANGES.txt build.xml

2011-10-06 Thread lewismc
Author: lewismc
Date: Thu Oct  6 12:55:08 2011
New Revision: 1179605

URL: http://svn.apache.org/viewvc?rev=1179605view=rev
Log:
commit to address NUTCH-1136 and update to changes.txt

Modified:
nutch/branches/nutchgora/CHANGES.txt
nutch/branches/nutchgora/build.xml

Modified: nutch/branches/nutchgora/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1179605r1=1179604r2=1179605view=diff
==
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Thu Oct  6 12:55:08 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release nutchgora - Current Development
 
+* NUTCH-1136 Ant pmd target is broken
+
 * NUTCH-1058 Upgrade Solr schema version to 1.4 (markus)
 
 * NUTCH-672 allow unit tests to be run from bin/nutch (Todd Lipton via lewismc)

Modified: nutch/branches/nutchgora/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1179605r1=1179604r2=1179605view=diff
==
--- nutch/branches/nutchgora/build.xml (original)
+++ nutch/branches/nutchgora/build.xml Thu Oct  6 12:55:08 2011
@@ -328,50 +328,13 @@
   /target
 
   !-- == --
-  !-- Run code checks (PMD)  -- 
+  !-- Run Nutch proxy-- 
   !-- == --
-  target name=pmd depends=compile
-   property name=pmd.report location=${build.dir}/pmd-report.html /
-   taskdef name=pmd classname=net.sourceforge.pmd.ant.PMDTask
- classpath
- fileset dir=${lib.dir}
-include name=pmd-ext/*.jar /
-include name=xerces*.jar /
-  /fileset
- /classpath
-   /taskdef
-   pmd shortFilenames=true failonerror=true 
failOnRuleViolation=false
-encoding=${build.encoding} 
failuresPropertyName=pmd.failures
- rulesetunusedcode/ruleset
-  !--rulesetbasic/ruleset--
-  !--rulesetoptimizations/ruleset--
-  formatter type=html toFile=${pmd.report} /
- !-- formatter type=xml toFile=${tempbuild}/$report_pmd.xml/ --
-   fileset dir=${basedir}/src
-   include name=java/**/*.java/
-   include name=plugin/**/*.java/
-  /fileset
-/pmd
-   condition property=pmd.stop value=true
-  and
-isset property=pmd.failures /
-  not
-equals arg1=0 arg2=${pmd.failures} trim=true /
-  /not
-  /and
-   /condition
-   fail if=pmd.stopFAILURE: PMD shows ${pmd.failures} rule violations. 
See ${pmd.report} for details./fail
-  /target
 
-  target name=proxy depends=job
+  target name=proxy depends=job, compile-core-test
 java classname=org.apache.nutch.tools.proxy.TestbedProxy fork=true
   classpath refid=test.classpath/
   arg value=-fake/
-  arg value=-hostMode/
-  arg value=u/
-  arg value=-pageMode/
-  arg value=u/
-  arg value=-debug/
 !--
   arg value=-delay/
   arg value=-200/
@@ -380,15 +343,9 @@
 /java
   /target
 
-  target name=rundb
-java classname=org.hsqldb.server.Server fork=true
-  classpath refid=test.classpath/
-  arg value=--database.0/
-  arg value=file:data/benchmark/
-  arg value=--dbname.0/
-  arg value=nutchtest/
-/java
-  /target
+  !-- == --
+  !-- Run Nutch benchmarking analysis-- 
+  !-- == --
 
   target name=benchmark
 java classname=org.apache.nutch.tools.Benchmark fork=true




svn commit: r1177269 - in /nutch/branches/nutchgora: CHANGES.txt build.xml src/bin/nutch

2011-09-29 Thread lewismc
Author: lewismc
Date: Thu Sep 29 11:38:30 2011
New Revision: 1177269

URL: http://svn.apache.org/viewvc?rev=1177269view=rev
Log:
commit to address NUTCH-672 to update to changes.txt

Modified:
nutch/branches/nutchgora/CHANGES.txt
nutch/branches/nutchgora/build.xml
nutch/branches/nutchgora/src/bin/nutch

Modified: nutch/branches/nutchgora/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/CHANGES.txt?rev=1177269r1=1177268r2=1177269view=diff
==
--- nutch/branches/nutchgora/CHANGES.txt (original)
+++ nutch/branches/nutchgora/CHANGES.txt Thu Sep 29 11:38:30 2011
@@ -1,12 +1,14 @@
 Nutch Change Log
 
-Release 2.0 - Current Development
+Release nutchgora - Current Development
+
+* NUTCH-672 allow unit tests to be run from bin/nutch (Todd Lipton via lewismc)
 
 * NUTCH-937 Put plugins in classes/plugins in job file (Claudio Martella, 
Ferdy Galema, jnioche)
 
 * NUTCH-1131 Rely on published artefacts for GORA (jnioche)
 
-* NUTCH-1099 Adds HBase and Cassandra storage properties to nutch-default.xml
+* NUTCH-1099 Adds HBase and Cassandra storage properties to nutch-default.xml 
(lewismc)
 
 * NUTCH-1096 Empty (not null) ContentLength results in failure of fetch (Ferdy 
Galema via jnioche)
 

Modified: nutch/branches/nutchgora/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/build.xml?rev=1177269r1=1177268r2=1177269view=diff
==
--- nutch/branches/nutchgora/build.xml (original)
+++ nutch/branches/nutchgora/build.xml Thu Sep 29 11:38:30 2011
@@ -304,6 +304,9 @@
 copy todir=${runtime.local}/plugins
   fileset dir=${build.dir}/plugins/
 /copy
+copy todir=${runtime.local}/test
+  fileset dir=${build.dir}/test/
+/copy
   /target
 
   !-- == --

Modified: nutch/branches/nutchgora/src/bin/nutch
URL: 
http://svn.apache.org/viewvc/nutch/branches/nutchgora/src/bin/nutch?rev=1177269r1=1177268r2=1177269view=diff
==
--- nutch/branches/nutchgora/src/bin/nutch (original)
+++ nutch/branches/nutchgora/src/bin/nutch Thu Sep 29 11:38:30 2011
@@ -48,17 +48,18 @@ if [ $# = 0 ]; then
   echo Usage: nutch [-core] COMMAND
   echo where COMMAND is one of:
 # echo  crawl one-step crawler for intranets
-  echo  inject inject new urls into the database
-  echo  generate generate new segments to fetch from crawl db
-  echo  fetch fetch URLs marked during generate
-  echo  parse parse URLs marked during fetch
-  echo  updatedb update web table after parsing
-  echo  readdb read/dump records from page database
-  echo  solrindex run the solr indexer on parsed segments and linkdb
-  echo  solrdedup remove duplicates from solr
-  echo  plugin load a plugin and run one of its classes main()
+  echo  injectinject new urls into the database
+  echo  generate  generate new segments to fetch from crawl db
+  echo  fetch fetch URLs marked during generate
+  echo  parse parse URLs marked during fetch
+  echo  updatedb  update web table after parsing
+  echo  readdbread/dump records from page database
+  echo  solrindex run the solr indexer on parsed segments and linkdb
+  echo  solrdedup remove duplicates from solr
+  echo  pluginload a plugin and run one of its classes main()
+  echo  junit runs the given JUnit test
   echo  or
-  echo  CLASSNAME run the class named CLASSNAME
+  echo  CLASSNAME run the class named CLASSNAME
   echo Most commands print help when invoked w/o parameters.
   echo 
   echo Expert: -core option is for developers only. It avoids building the 
job jar, 
@@ -199,6 +200,9 @@ elif [ $COMMAND = solrdedup ] ; then
 CLASS=org.apache.nutch.indexer.solr.SolrDeleteDuplicates
 elif [ $COMMAND = plugin ] ; then
 CLASS=org.apache.nutch.plugin.PluginRepository
+elif [ $COMMAND = junit ] ; then
+  CLASSPATH=$CLASSPATH:test/classes/
+  CLASS=junit.textui.TestRunner
 else
 CLASS=$COMMAND
 fi




svn commit: r1177290 - in /nutch/trunk: CHANGES.txt src/java/org/apache/nutch/util/LogUtil.java

2011-09-29 Thread lewismc
Author: lewismc
Date: Thu Sep 29 12:47:39 2011
New Revision: 1177290

URL: http://svn.apache.org/viewvc?rev=1177290view=rev
Log:
commit to address NUTCH-1078 and update to changes.txt

Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/util/LogUtil.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1177290r1=1177289r2=1177290view=diff
==
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Sep 29 12:47:39 2011
@@ -6,7 +6,7 @@ Release 1.4 - Current development
 
 * NUTCH-937 Put plugins in classes/plugins in job file (Claudio Martella, 
Ferdy Galema, jnioche)
 
-* NUTCH-623 Change plugin source directory languageidentifier to 
language-identifier
+* NUTCH-623 Change plugin source directory languageidentifier to 
language-identifier (lewismc)
 
 * NUTCH-1074 topN is ignored with maxNumSegments and generate.max.count 
(Robert Thomson via markus)
 

Modified: nutch/trunk/src/java/org/apache/nutch/util/LogUtil.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/LogUtil.java?rev=1177290r1=1177289r2=1177290view=diff
==
--- nutch/trunk/src/java/org/apache/nutch/util/LogUtil.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/util/LogUtil.java Thu Sep 29 12:47:39 
2011
@@ -22,7 +22,7 @@ import java.io.IOException;
 import java.io.PrintStream;
 import java.lang.reflect.Method;
 
-// Commons Logging imports
+// slf4j Logging imports
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -45,12 +45,12 @@ public class LogUtil {
 
   static {
 try {
-  TRACE = Logger.class.getMethod(trace, new Class[] { Object.class });
-  DEBUG = Logger.class.getMethod(debug, new Class[] { Object.class });
-  INFO  = Logger.class.getMethod(info,  new Class[] { Object.class });
-  WARN  = Logger.class.getMethod(warn,  new Class[] { Object.class });
-  ERROR = Logger.class.getMethod(error, new Class[] { Object.class });
-  FATAL = Logger.class.getMethod(error, new Class[] { Object.class });
+  TRACE = Logger.class.getMethod(trace, new Class[] { String.class });
+  DEBUG = Logger.class.getMethod(debug, new Class[] { String.class });
+  INFO  = Logger.class.getMethod(info,  new Class[] { String.class });
+  WARN  = Logger.class.getMethod(warn,  new Class[] { String.class });
+  ERROR = Logger.class.getMethod(error, new Class[] { String.class });
+  FATAL = Logger.class.getMethod(error, new Class[] { String.class });
 } catch(Exception e) {
   if (LOG.isErrorEnabled()) {
 LOG.error(Cannot init log methods, e);
@@ -100,7 +100,7 @@ public class LogUtil {
   if (!hasNewline())
 return;
   try {
-method.invoke(logger, new Object[] { toString().trim() });
+method.invoke(logger, new String[] { toString().trim() });
   } catch (Exception e) {
 if (LOG.isErrorEnabled()) {
   LOG.error(Cannot log with method [ + method + ], e);




svn commit: r1172043 - in /nutch/site: forrest/src/documentation/ forrest/src/documentation/content/xdocs/ forrest/src/documentation/resources/images/ publish/ publish/images/ publish/skin/images/

2011-09-17 Thread lewismc
Author: lewismc
Date: Sat Sep 17 18:41:15 2011
New Revision: 1172043

URL: http://svn.apache.org/viewvc?rev=1172043view=rev
Log:
commit to update site to address NUTCH-1092, as well as other trivial site 
update, of which I suspect there will be some more once voting has finished.

Added:
nutch/site/forrest/src/documentation/content/xdocs/faq.xml
nutch/site/forrest/src/documentation/content/xdocs/tutorial.xml
nutch/site/forrest/src/documentation/content/xdocs/wiki.xml
nutch/site/forrest/src/documentation/resources/images/feather-small.gif   
(with props)
Removed:
nutch/site/forrest/src/documentation/content/xdocs/i18n.xml
Modified:
nutch/site/forrest/src/documentation/content/xdocs/about.xml
nutch/site/forrest/src/documentation/content/xdocs/index.xml
nutch/site/forrest/src/documentation/content/xdocs/site.xml
nutch/site/forrest/src/documentation/content/xdocs/tabs.xml
nutch/site/forrest/src/documentation/skinconf.xml
nutch/site/publish/about.html
nutch/site/publish/about.pdf
nutch/site/publish/bot.html
nutch/site/publish/bot.pdf
nutch/site/publish/broken-links.xml
nutch/site/publish/credits.html
nutch/site/publish/credits.pdf
nutch/site/publish/images/built-with-forrest-button.png
nutch/site/publish/index.html
nutch/site/publish/issue_tracking.html
nutch/site/publish/issue_tracking.pdf
nutch/site/publish/linkmap.html
nutch/site/publish/linkmap.pdf
nutch/site/publish/mailing_lists.html
nutch/site/publish/mailing_lists.pdf
nutch/site/publish/nightly.html
nutch/site/publish/nightly.pdf
nutch/site/publish/skin/images/built-with-forrest-button.png
nutch/site/publish/version_control.html
nutch/site/publish/version_control.pdf

Modified: nutch/site/forrest/src/documentation/content/xdocs/about.xml
URL: 
http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/about.xml?rev=1172043r1=1172042r2=1172043view=diff
==
--- nutch/site/forrest/src/documentation/content/xdocs/about.xml (original)
+++ nutch/site/forrest/src/documentation/content/xdocs/about.xml Sat Sep 17 
18:41:15 2011
@@ -40,7 +40,7 @@
plugin infrastructure./p
 
   pFor more information about Apache Nutch, please see the a
-  href=ext:wikiNutch wiki./a/p
+  href=wiki.htmlNutch wiki./a/p
 
   
 

Added: nutch/site/forrest/src/documentation/content/xdocs/faq.xml
URL: 
http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/faq.xml?rev=1172043view=auto
==
--- nutch/site/forrest/src/documentation/content/xdocs/faq.xml (added)
+++ nutch/site/forrest/src/documentation/content/xdocs/faq.xml Sat Sep 17 
18:41:15 2011
@@ -0,0 +1,35 @@
+?xml version=1.0?
+!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the License); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an AS IS BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+--
+!DOCTYPE document PUBLIC -//APACHE//DTD Documentation V2.0//EN 
+  http://forrest.apache.org/dtd/document-v20.dtd;
+
+document 
+
+header
+  titleApache Nutch FAQ's/title 
+/header 
+
+body 
+
+section
+  titleFrequently Asked Questions/title pWelcome to the Apache Nutch 
FAQ's which can be found a 
href=http://wiki.apache.org/nutch/FAQ;here/a./p
+/section
+
+/body
+
+/document

Modified: nutch/site/forrest/src/documentation/content/xdocs/index.xml
URL: 
http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/index.xml?rev=1172043r1=1172042r2=1172043view=diff
==
--- nutch/site/forrest/src/documentation/content/xdocs/index.xml (original)
+++ nutch/site/forrest/src/documentation/content/xdocs/index.xml Sat Sep 17 
18:41:15 2011
@@ -1,6 +1,4 @@
 ?xml version=1.0?
-!DOCTYPE document PUBLIC -//APACHE//DTD Documentation V2.0//EN 
-  http://forrest.apache.org/dtd/document-v20.dtd;
 !--
  Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -17,12 +15,13 @@
   See the License for the specific language governing permissions and
   limitations under the License.
 --
+!DOCTYPE document

svn commit: r1169475 - in /nutch/trunk: CHANGES.txt conf/nutch-default.xml

2011-09-11 Thread lewismc
Author: lewismc
Date: Sun Sep 11 16:40:22 2011
New Revision: 1169475

URL: http://svn.apache.org/viewvc?rev=1169475view=rev
Log:
commit to address NUTCH-1099 and update to changes.txt

Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/conf/nutch-default.xml

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1169475r1=1169474r2=1169475view=diff
==
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Sun Sep 11 16:40:22 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 2.0 - Current Development
 
+* NUTCH-1099 Adds HBase and Cassandra storage properties to nutch-default.xml
+
 * NUTCH-1096 Empty (not null) ContentLength results in failure of fetch (Ferdy 
Galema via jnioche)
 
 * NUTCH-1089 Short compressed pages caused exception in protocol-httpclient 
(Simone Frenzel via jnioche)

Modified: nutch/trunk/conf/nutch-default.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/conf/nutch-default.xml?rev=1169475r1=1169474r2=1169475view=diff
==
--- nutch/trunk/conf/nutch-default.xml (original)
+++ nutch/trunk/conf/nutch-default.xml Sun Sep 11 16:40:22 2011
@@ -1068,6 +1068,22 @@
   descriptionDefault class for storing data/description
 /property
 
+!--
+property
+  namestorage.data.store.class/name
+  valueorg.apache.gora.cassandra.store.CassandraStore/value
+  descriptionClass for storing data in Apache Cassandra/description
+/property
+--
+
+!--
+property
+  namestorage.data.store.class/name
+  valueorg.apache.gora.hbase.store.HBaseStore/value
+  descriptionClass for storing data in Apache HBase/description
+/property
+--
+
 property
   namestorage.schema/name
   valuewebpage/value




svn commit: r1169502 - in /nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield: StaticFieldIndexer.java package.html

2011-09-11 Thread lewismc
Author: lewismc
Date: Sun Sep 11 19:13:38 2011
New Revision: 1169502

URL: http://svn.apache.org/viewvc?rev=1169502view=rev
Log:
commit to address final patch for NUTCH-940 1.4 branch.

Added:

nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/package.html
Modified:

nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java

Modified: 
nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java?rev=1169502r1=1169501r2=1169502view=diff
==
--- 
nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java
 (original)
+++ 
nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java
 Sun Sep 11 19:13:38 2011
@@ -29,6 +29,11 @@ import org.apache.nutch.parse.Parse;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.conf.Configuration;
 
+/** A simple plugin called at indexing that adds fields with static data. 
+ *  You can specify a list of fieldname:fieldcontent per nutch job.
+ *  It can be useful when collections can't be created by urlpatterns, 
+ *  like in subcollection, but on a job-basis. */
+
 public class StaticFieldIndexer implements IndexingFilter {
private Configuration conf;
private HashMapString, String[] fields;

Added: 
nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/package.html
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/package.html?rev=1169502view=auto
==
--- 
nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/package.html
 (added)
+++ 
nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/package.html
 Sun Sep 11 19:13:38 2011
@@ -0,0 +1,5 @@
+html
+body
+pA simple plugin called at indexing that adds fields with static data. You 
can specify a list of fieldname:fieldcontent per nutch job. It can be useful 
when collections can't be created by urlpatterns, like in subcollection, but on 
a job-basis./pp/p
+/body
+/html




svn commit: r1167651 - in /nutch/branches/branch-1.4: ./ conf/ src/plugin/ src/plugin/index-static/ src/plugin/index-static/src/ src/plugin/index-static/src/java/ src/plugin/index-static/src/java/org/

2011-09-10 Thread lewismc
Author: lewismc
Date: Sat Sep 10 23:46:00 2011
New Revision: 1167651

URL: http://svn.apache.org/viewvc?rev=1167651view=rev
Log:
commit to address NUTCH-940 and update to changes.txt

Added:
nutch/branches/branch-1.4/src/plugin/index-static/
nutch/branches/branch-1.4/src/plugin/index-static/build.xml
nutch/branches/branch-1.4/src/plugin/index-static/ivy.xml
nutch/branches/branch-1.4/src/plugin/index-static/plugin.xml
nutch/branches/branch-1.4/src/plugin/index-static/src/
nutch/branches/branch-1.4/src/plugin/index-static/src/java/
nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/
nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/
nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/

nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/

nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/

nutch/branches/branch-1.4/src/plugin/index-static/src/java/org/apache/nutch/indexer/staticfield/StaticFieldIndexer.java
Modified:
nutch/branches/branch-1.4/CHANGES.txt
nutch/branches/branch-1.4/conf/nutch-default.xml
nutch/branches/branch-1.4/src/plugin/build.xml

Modified: nutch/branches/branch-1.4/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1167651r1=1167650r2=1167651view=diff
==
--- nutch/branches/branch-1.4/CHANGES.txt (original)
+++ nutch/branches/branch-1.4/CHANGES.txt Sat Sep 10 23:46:00 2011
@@ -2,6 +2,12 @@ Nutch Change Log
 
 Release 1.4 - Current development
 
+* NUTCH-940 static field plugin (Claudio Martella via lewismc)
+
+* NUTCH-914 Implement Apache Project Branding Requirements (lewismc)
+
+* NUTCH-1095 remove i18n from Nutch site to archive and legacy secton of wiki 
(lewismc)
+
 * NUTCH-1101 Option to purge db_gone records with updatedb (markus)
 
 * NUTCH-1096 Empty (not null) ContentLength results in failure of fetch (Ferdy 
Galema via jnioche)

Modified: nutch/branches/branch-1.4/conf/nutch-default.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/conf/nutch-default.xml?rev=1167651r1=1167650r2=1167651view=diff
==
--- nutch/branches/branch-1.4/conf/nutch-default.xml (original)
+++ nutch/branches/branch-1.4/conf/nutch-default.xml Sat Sep 10 23:46:00 2011
@@ -1050,6 +1050,19 @@
   /description
 /property
 
+!-- index-static plugin properties --
+
+property
+  nameindex-static/name
+  value/value
+  description
+  A simple plugin called at indexing that adds fields with static data. 
+  You can specify a list of fieldname:fieldcontent per nutch job.
+  It can be useful when collections can't be created by urlpatterns, 
+  like in subcollection, but on a job-basis.
+  /description
+/property
+
 !-- Temporary Hadoop 0.17.x workaround. --
 
 property

Modified: nutch/branches/branch-1.4/src/plugin/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/plugin/build.xml?rev=1167651r1=1167650r2=1167651view=diff
==
--- nutch/branches/branch-1.4/src/plugin/build.xml (original)
+++ nutch/branches/branch-1.4/src/plugin/build.xml Sat Sep 10 23:46:00 2011
@@ -31,6 +31,7 @@
  ant dir=index-basic target=deploy/
  ant dir=index-anchor target=deploy/
  ant dir=index-more target=deploy/
+ ant dir=index-static target=deploy/
  ant dir=languageidentifier target=deploy/
  ant dir=lib-http target=deploy/
  ant dir=lib-nekohtml target=deploy/
@@ -101,6 +102,7 @@
 ant dir=index-basic target=clean/
 ant dir=index-anchor target=clean/
 ant dir=index-more target=clean/
+ant dir=index-static target=clean/
 ant dir=languageidentifier target=clean/
 ant dir=lib-commons-httpclient target=clean/
 ant dir=lib-http target=clean/

Added: nutch/branches/branch-1.4/src/plugin/index-static/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/plugin/index-static/build.xml?rev=1167651view=auto
==
--- nutch/branches/branch-1.4/src/plugin/index-static/build.xml (added)
+++ nutch/branches/branch-1.4/src/plugin/index-static/build.xml Sat Sep 10 
23:46:00 2011
@@ -0,0 +1,22 @@
+?xml version=1.0?
+!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the License); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed

svn commit: r1161287 - in /nutch/site: forrest/src/documentation/content/xdocs/ publish/

2011-08-24 Thread lewismc
Author: lewismc
Date: Wed Aug 24 21:31:07 2011
New Revision: 1161287

URL: http://svn.apache.org/viewvc?rev=1161287view=rev
Log:
commit to address NUTCH-1095

Modified:
nutch/site/forrest/src/documentation/content/xdocs/site.xml
nutch/site/publish/about.html
nutch/site/publish/about.pdf
nutch/site/publish/bot.html
nutch/site/publish/bot.pdf
nutch/site/publish/credits.html
nutch/site/publish/credits.pdf
nutch/site/publish/index.html
nutch/site/publish/index.pdf
nutch/site/publish/issue_tracking.html
nutch/site/publish/issue_tracking.pdf
nutch/site/publish/linkmap.html
nutch/site/publish/linkmap.pdf
nutch/site/publish/mailing_lists.html
nutch/site/publish/mailing_lists.pdf
nutch/site/publish/nightly.html
nutch/site/publish/nightly.pdf
nutch/site/publish/version_control.html
nutch/site/publish/version_control.pdf

Modified: nutch/site/forrest/src/documentation/content/xdocs/site.xml
URL: 
http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/site.xml?rev=1161287r1=1161286r2=1161287view=diff
==
--- nutch/site/forrest/src/documentation/content/xdocs/site.xml (original)
+++ nutch/site/forrest/src/documentation/content/xdocs/site.xml Wed Aug 24 
21:31:07 2011
@@ -46,7 +46,6 @@ See http://forrest.apache.org/docs/linki
 wikilabel=Wiki href=ext:wiki /
 tutoriallabel=Tutorial href=ext:tutorial /
 webmasters  label=Robothref=bot.html /
-i18nlabel=i18n href=i18n.html /
 apidocs label=API Docs (1.3)href=apidocs-1.3/index.html/
 apidocs label=API Docs (1.2)href=apidocs-1.2/index.html/
 apidocs label=API Docs (nightly)href=ext:nightly-api /

Modified: nutch/site/publish/about.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/about.html?rev=1161287r1=1161286r2=1161287view=diff
==
--- nutch/site/publish/about.html (original)
+++ nutch/site/publish/about.html Wed Aug 24 21:31:07 2011
@@ -183,9 +183,6 @@ document.write(Last Published:  + docu
 a href=bot.htmlRobot /a
 /div
 div class=menuitem
-a href=i18n.htmli18n/a
-/div
-div class=menuitem
 a href=apidocs-1.3/index.htmlAPI Docs (1.3)/a
 /div
 div class=menuitem

Modified: nutch/site/publish/about.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/about.pdf?rev=1161287r1=1161286r2=1161287view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/bot.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/bot.html?rev=1161287r1=1161286r2=1161287view=diff
==
--- nutch/site/publish/bot.html (original)
+++ nutch/site/publish/bot.html Wed Aug 24 21:31:07 2011
@@ -183,9 +183,6 @@ document.write(Last Published:  + docu
 div class=menupagetitleRobot /div
 /div
 div class=menuitem
-a href=i18n.htmli18n/a
-/div
-div class=menuitem
 a href=apidocs-1.3/index.htmlAPI Docs (1.3)/a
 /div
 div class=menuitem

Modified: nutch/site/publish/bot.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/bot.pdf?rev=1161287r1=1161286r2=1161287view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/credits.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/credits.html?rev=1161287r1=1161286r2=1161287view=diff
==
--- nutch/site/publish/credits.html (original)
+++ nutch/site/publish/credits.html Wed Aug 24 21:31:07 2011
@@ -183,9 +183,6 @@ document.write(Last Published:  + docu
 a href=bot.htmlRobot /a
 /div
 div class=menuitem
-a href=i18n.htmli18n/a
-/div
-div class=menuitem
 a href=apidocs-1.3/index.htmlAPI Docs (1.3)/a
 /div
 div class=menuitem

Modified: nutch/site/publish/credits.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/credits.pdf?rev=1161287r1=1161286r2=1161287view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/index.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/index.html?rev=1161287r1=1161286r2=1161287view=diff
==
--- nutch/site/publish/index.html (original)
+++ nutch/site/publish/index.html Wed Aug 24 21:31:07 2011
@@ -183,9 +183,6 @@ document.write(Last Published:  + docu
 a href=bot.htmlRobot /a
 /div
 div class=menuitem
-a href=i18n.htmli18n/a
-/div
-div class=menuitem
 a href=apidocs-1.3/index.htmlAPI Docs (1.3)/a
 /div
 div class=menuitem

Modified: nutch/site/publish/index.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish

svn commit: r1156683 - in /nutch/branches/branch-1.4: CHANGES.txt src/plugin/languageidentifier/build.xml src/plugin/languageidentifier/plugin.xml

2011-08-11 Thread lewismc
Author: lewismc
Date: Thu Aug 11 17:20:06 2011
New Revision: 1156683

URL: http://svn.apache.org/viewvc?rev=1156683view=rev
Log:
commit to address NUTCH-623 and NUTCH-914 as well as update to changes.txt

Modified:
nutch/branches/branch-1.4/CHANGES.txt
nutch/branches/branch-1.4/src/plugin/languageidentifier/build.xml
nutch/branches/branch-1.4/src/plugin/languageidentifier/plugin.xml

Modified: nutch/branches/branch-1.4/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1156683r1=1156682r2=1156683view=diff
==
--- nutch/branches/branch-1.4/CHANGES.txt (original)
+++ nutch/branches/branch-1.4/CHANGES.txt Thu Aug 11 17:20:06 2011
@@ -2,6 +2,10 @@ Nutch Change Log
 
 Release 1.4 - Current development
 
+* NUTCH-914 Implement Apache Project Branding Requirements (lewismc via 
jnioche)
+
+*NUTCH-623 Change plugin source directory languageidentifier to 
language-identifier (lewismc)
+
 * NUTCH-1069 Readlinkdb broken on Hadoop  0.20 (markus)
 
 * NUTCH-1044 Redirected URLs and possibly all of their outlinked URLs have 
invalid scores (jnioche)

Modified: nutch/branches/branch-1.4/src/plugin/languageidentifier/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/plugin/languageidentifier/build.xml?rev=1156683r1=1156682r2=1156683view=diff
==
--- nutch/branches/branch-1.4/src/plugin/languageidentifier/build.xml (original)
+++ nutch/branches/branch-1.4/src/plugin/languageidentifier/build.xml Thu Aug 
11 17:20:06 2011
@@ -15,7 +15,7 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 --
-project name=language-identifier default=jar-core
+project name=languageidentifier default=jar-core
 
   import file=../build-plugin.xml/
 

Modified: nutch/branches/branch-1.4/src/plugin/languageidentifier/plugin.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/plugin/languageidentifier/plugin.xml?rev=1156683r1=1156682r2=1156683view=diff
==
--- nutch/branches/branch-1.4/src/plugin/languageidentifier/plugin.xml 
(original)
+++ nutch/branches/branch-1.4/src/plugin/languageidentifier/plugin.xml Thu Aug 
11 17:20:06 2011
@@ -16,7 +16,7 @@
  limitations under the License.
 --
 plugin
-   id=language-identifier
+   id=languageidentifier
name=Language Identification Parser/Filter
version=1.0.0
provider-name=nutch.org




svn commit: r1156692 - in /nutch/trunk: CHANGES.txt src/plugin/languageidentifier/build.xml src/plugin/languageidentifier/plugin.xml

2011-08-11 Thread lewismc
Author: lewismc
Date: Thu Aug 11 17:25:51 2011
New Revision: 1156692

URL: http://svn.apache.org/viewvc?rev=1156692view=rev
Log:
commit to address NUTCH-623 and changes.txt

Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/plugin/languageidentifier/build.xml
nutch/trunk/src/plugin/languageidentifier/plugin.xml

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1156692r1=1156691r2=1156692view=diff
==
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Aug 11 17:25:51 2011
@@ -2,6 +2,10 @@ Nutch Change Log
 
 Release 2.0 - Current Development
 
+* NUTCH-914 Implement Apache Project Branding Requirements (lewismc via 
jnioche)
+
+* NUTCH-623 Change plugin source directory languageidentifier to 
language-identifier (lewismc)
+
 * NUTCH-1065 New mvn.template (lewismc)
 
 * NUTCH-1045 MimeUtil to rely on default config provided by Tika (jnioche)

Modified: nutch/trunk/src/plugin/languageidentifier/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/languageidentifier/build.xml?rev=1156692r1=1156691r2=1156692view=diff
==
--- nutch/trunk/src/plugin/languageidentifier/build.xml (original)
+++ nutch/trunk/src/plugin/languageidentifier/build.xml Thu Aug 11 17:25:51 2011
@@ -15,7 +15,7 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 --
-project name=language-identifier default=jar-core
+project name=languageidentifier default=jar-core
 
   import file=../build-plugin.xml/
 

Modified: nutch/trunk/src/plugin/languageidentifier/plugin.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/languageidentifier/plugin.xml?rev=1156692r1=1156691r2=1156692view=diff
==
--- nutch/trunk/src/plugin/languageidentifier/plugin.xml (original)
+++ nutch/trunk/src/plugin/languageidentifier/plugin.xml Thu Aug 11 17:25:51 
2011
@@ -16,7 +16,7 @@
  limitations under the License.
 --
 plugin
-   id=language-identifier
+   id=languageidentifier
name=Language Identification Parser/Filter
version=1.0.0
provider-name=nutch.org




svn commit: r1156711 - in /nutch/branches/branch-1.4: CHANGES.txt src/plugin/languageidentifier/build.xml src/plugin/languageidentifier/plugin.xml

2011-08-11 Thread lewismc
Author: lewismc
Date: Thu Aug 11 18:16:31 2011
New Revision: 1156711

URL: http://svn.apache.org/viewvc?rev=1156711view=rev
Log:
reverting changes made by commit of NUTCH-623 as the patch breaks tests

Modified:
nutch/branches/branch-1.4/CHANGES.txt
nutch/branches/branch-1.4/src/plugin/languageidentifier/build.xml
nutch/branches/branch-1.4/src/plugin/languageidentifier/plugin.xml

Modified: nutch/branches/branch-1.4/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1156711r1=1156710r2=1156711view=diff
==
--- nutch/branches/branch-1.4/CHANGES.txt (original)
+++ nutch/branches/branch-1.4/CHANGES.txt Thu Aug 11 18:16:31 2011
@@ -4,8 +4,6 @@ Release 1.4 - Current development
 
 * NUTCH-914 Implement Apache Project Branding Requirements (lewismc via 
jnioche)
 
-*NUTCH-623 Change plugin source directory languageidentifier to 
language-identifier (lewismc)
-
 * NUTCH-1069 Readlinkdb broken on Hadoop  0.20 (markus)
 
 * NUTCH-1044 Redirected URLs and possibly all of their outlinked URLs have 
invalid scores (jnioche)

Modified: nutch/branches/branch-1.4/src/plugin/languageidentifier/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/plugin/languageidentifier/build.xml?rev=1156711r1=1156710r2=1156711view=diff
==
--- nutch/branches/branch-1.4/src/plugin/languageidentifier/build.xml (original)
+++ nutch/branches/branch-1.4/src/plugin/languageidentifier/build.xml Thu Aug 
11 18:16:31 2011
@@ -15,7 +15,7 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 --
-project name=languageidentifier default=jar-core
+project name=language-identifier default=jar-core
 
   import file=../build-plugin.xml/
 

Modified: nutch/branches/branch-1.4/src/plugin/languageidentifier/plugin.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/plugin/languageidentifier/plugin.xml?rev=1156711r1=1156710r2=1156711view=diff
==
--- nutch/branches/branch-1.4/src/plugin/languageidentifier/plugin.xml 
(original)
+++ nutch/branches/branch-1.4/src/plugin/languageidentifier/plugin.xml Thu Aug 
11 18:16:31 2011
@@ -16,7 +16,7 @@
  limitations under the License.
 --
 plugin
-   id=languageidentifier
+   id=language-identifier
name=Language Identification Parser/Filter
version=1.0.0
provider-name=nutch.org




svn commit: r1156712 - in /nutch/trunk: CHANGES.txt src/plugin/languageidentifier/build.xml src/plugin/languageidentifier/plugin.xml

2011-08-11 Thread lewismc
Author: lewismc
Date: Thu Aug 11 18:18:27 2011
New Revision: 1156712

URL: http://svn.apache.org/viewvc?rev=1156712view=rev
Log:
commit to revert changes by NUTCH-623 which broke tests.

Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/plugin/languageidentifier/build.xml
nutch/trunk/src/plugin/languageidentifier/plugin.xml

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1156712r1=1156711r2=1156712view=diff
==
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Aug 11 18:18:27 2011
@@ -4,8 +4,6 @@ Release 2.0 - Current Development
 
 * NUTCH-914 Implement Apache Project Branding Requirements (lewismc via 
jnioche)
 
-* NUTCH-623 Change plugin source directory languageidentifier to 
language-identifier (lewismc)
-
 * NUTCH-1065 New mvn.template (lewismc)
 
 * NUTCH-1045 MimeUtil to rely on default config provided by Tika (jnioche)

Modified: nutch/trunk/src/plugin/languageidentifier/build.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/languageidentifier/build.xml?rev=1156712r1=1156711r2=1156712view=diff
==
--- nutch/trunk/src/plugin/languageidentifier/build.xml (original)
+++ nutch/trunk/src/plugin/languageidentifier/build.xml Thu Aug 11 18:18:27 2011
@@ -15,7 +15,7 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 --
-project name=languageidentifier default=jar-core
+project name=language-identifier default=jar-core
 
   import file=../build-plugin.xml/
 

Modified: nutch/trunk/src/plugin/languageidentifier/plugin.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/languageidentifier/plugin.xml?rev=1156712r1=1156711r2=1156712view=diff
==
--- nutch/trunk/src/plugin/languageidentifier/plugin.xml (original)
+++ nutch/trunk/src/plugin/languageidentifier/plugin.xml Thu Aug 11 18:18:27 
2011
@@ -16,7 +16,7 @@
  limitations under the License.
 --
 plugin
-   id=languageidentifier
+   id=language-identifier
name=Language Identification Parser/Filter
version=1.0.0
provider-name=nutch.org




svn commit: r1156101 - /nutch/trunk/doap.rdf

2011-08-10 Thread lewismc
Author: lewismc
Date: Wed Aug 10 10:47:24 2011
New Revision: 1156101

URL: http://svn.apache.org/viewvc?rev=1156101view=rev
Log:
commit to address NUTCH-920 adding trunk 2.0 DOAP file to svn.

Added:
nutch/trunk/doap.rdf

Added: nutch/trunk/doap.rdf
URL: http://svn.apache.org/viewvc/nutch/trunk/doap.rdf?rev=1156101view=auto
==
--- nutch/trunk/doap.rdf (added)
+++ nutch/trunk/doap.rdf Wed Aug 10 10:47:24 2011
@@ -0,0 +1,57 @@
+?xml version=1.0?
+?xml-stylesheet type=text/xsl?
+rdf:RDF xml:lang=en
+ xmlns=http://usefulinc.com/ns/doap#; 
+ xmlns:rdf=http://www.w3.org/1999/02/22-rdf-syntax-ns#; 
+ xmlns:asfext=http://projects.apache.org/ns/asfext#;
+ xmlns:foaf=http://xmlns.com/foaf/0.1/;
+!--
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the License); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+   
+ http://www.apache.org/licenses/LICENSE-2.0
+   
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an AS IS BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+--
+  Project rdf:about=http://nutch.apache.org;
+created2011-07-21/created
+license rdf:resource=http://usefulinc.com/doap/licenses/asl20; /
+nameApache Nutch/name
+homepage rdf:resource=http://nutch.apache.org; /
+asfext:pmc rdf:resource=http://nutch.apache.org; /
+shortdescNutch 2.0 is a beeding edge (trunk) development of the Apache 
Nutch web search software./shortdesc
+descriptionApache Nutch 2.0 maintains a refined architecture by 
delegating searching, parsing, and data storage to other software projects. In 
particular the storage layer has been delegated to the object relational 
mapping framework Gora (Apache Incubator) enabling the focus of Nutch 2.0 to be 
entirely on web crawling. This logic promotes Nutch 2.0 as a simpler, focussed 
web crawler enabling easy integration with other resources. /description
+bug-database rdf:resource=http://issues.apache.org/jira/browse/NUTCH; /
+mailing-list 
rdf:resource=http://www.mail-archive.com/dev%40nutch.apache.org/; /
+download-page rdf:resource=http://svn.apache.org/repos/asf/nutch/trunk/; 
/
+programming-languageJava/programming-language
+category rdf:resource=http://projects.apache.org/category/web-framework; 
/
+release
+  Version
+nameNutch 2.0 Trunk/name
+createdtbc/created
+revision2.0/revision
+  /Version
+/release
+repository
+  SVNRepository
+location 
rdf:resource=https://svn.apache.org/repos/asf/nutch/trunk//
+browse rdf:resource=http://svn.apache.org/viewvc/nutch/trunk//
+  /SVNRepository
+/repository
+maintainer
+  foaf:Person
+foaf:nameNutch PMC/foaf:name
+  foaf:mbox rdf:resource=mailto:d...@nutch.apache.org/
+  /foaf:Person
+/maintainer
+  /Project
+/rdf:RDF




svn commit: r1153833 - in /nutch/trunk: CHANGES.txt conf/domain-urlfilter.txt ivy/mvn.template

2011-08-04 Thread lewismc
Author: lewismc
Date: Thu Aug  4 10:24:04 2011
New Revision: 1153833

URL: http://svn.apache.org/viewvc?rev=1153833view=rev
Log:
commit to address NUTCH-1065 - New mvn.template and update of changes.txt

Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/conf/domain-urlfilter.txt
nutch/trunk/ivy/mvn.template

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1153833r1=1153832r2=1153833view=diff
==
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Aug  4 10:24:04 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 2.0 - Current Development
 
+* NUTCH-1065 New mvn.template (lewismc)
+
 * NUTCH-1045 MimeUtil to rely on default config provided by Tika (jnioche)
 
 * NUTCH-1037 Option to deduplicate anchors prior to indexing (markus)

Modified: nutch/trunk/conf/domain-urlfilter.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/conf/domain-urlfilter.txt?rev=1153833r1=1153832r2=1153833view=diff
==
--- nutch/trunk/conf/domain-urlfilter.txt (original)
+++ nutch/trunk/conf/domain-urlfilter.txt Thu Aug  4 10:24:04 2011
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# config file for urlfilter-domsin plugin
\ No newline at end of file
+# config file for urlfilter-domain plugin

Modified: nutch/trunk/ivy/mvn.template
URL: 
http://svn.apache.org/viewvc/nutch/trunk/ivy/mvn.template?rev=1153833r1=1153832r2=1153833view=diff
==
--- nutch/trunk/ivy/mvn.template (original)
+++ nutch/trunk/ivy/mvn.template Thu Aug  4 10:24:04 2011
@@ -64,11 +64,6 @@
emailjnio...@apache.org/email
/developer
developer
-   idotis/id
-   nameOtis Gospodnetić/name
-   emailo...@apache.org/email
-   /developer
-   developer
idsiren/id
nameSami Siren/name
emailsi...@apache.org/email
@@ -83,6 +78,11 @@
 nameAlexis Detlegrode/name
 emailale...@apache.org/email
 /developer
+   developer
+   idlewismc/id
+   nameLewis John McGibbney/name
+   emaillewi...@apache.org/email
+   /developer
/developers
 build
   sourceDirectorysrc/java/sourceDirectory




svn commit: r1153108 - in /nutch/site: forrest/src/documentation/content/xdocs/ publish/ publish/skin/images/

2011-08-02 Thread lewismc
Author: lewismc
Date: Tue Aug  2 12:43:01 2011
New Revision: 1153108

URL: http://svn.apache.org/viewvc?rev=1153108view=rev
Log:
commit to address NUTCH-917

Modified:
nutch/site/forrest/src/documentation/content/xdocs/index.xml
nutch/site/forrest/src/documentation/content/xdocs/site.xml
nutch/site/publish/about.html
nutch/site/publish/about.pdf
nutch/site/publish/bot.html
nutch/site/publish/bot.pdf
nutch/site/publish/credits.html
nutch/site/publish/credits.pdf
nutch/site/publish/i18n.html
nutch/site/publish/i18n.pdf
nutch/site/publish/index.html
nutch/site/publish/index.pdf
nutch/site/publish/issue_tracking.html
nutch/site/publish/issue_tracking.pdf
nutch/site/publish/linkmap.html
nutch/site/publish/linkmap.pdf
nutch/site/publish/mailing_lists.html
nutch/site/publish/mailing_lists.pdf
nutch/site/publish/nightly.html
nutch/site/publish/nightly.pdf
nutch/site/publish/skin/images/rc-b-l-15-1body-2menu-3menu.png
nutch/site/publish/skin/images/rc-b-r-15-1body-2menu-3menu.png

nutch/site/publish/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png
nutch/site/publish/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png

nutch/site/publish/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png

nutch/site/publish/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png
nutch/site/publish/skin/images/rc-t-r-15-1body-2menu-3menu.png
nutch/site/publish/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png

nutch/site/publish/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png

nutch/site/publish/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png
nutch/site/publish/version_control.html
nutch/site/publish/version_control.pdf

Modified: nutch/site/forrest/src/documentation/content/xdocs/index.xml
URL: 
http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/index.xml?rev=1153108r1=1153107r2=1153108view=diff
==
--- nutch/site/forrest/src/documentation/content/xdocs/index.xml (original)
+++ nutch/site/forrest/src/documentation/content/xdocs/index.xml Tue Aug  2 
12:43:01 2011
@@ -22,7 +22,7 @@
   header 
 titleWelcome to Apache Nutch#174;/title 
   abstractApache Nutch is an open source web-search software project. 
Nutch is a project of the a href=http://www.apache.org/;Apache Software 
Foundation/a
-and is part of the larger Apache community of developers and users. 
More about Nutch can be found a href=about.htmlhere/a.
+and is part of the larger Apache community of developers and users. 
More about Nutch can be found a href=./about.htmlhere./a
   /abstract
   /header 
 

Modified: nutch/site/forrest/src/documentation/content/xdocs/site.xml
URL: 
http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/site.xml?rev=1153108r1=1153107r2=1153108view=diff
==
--- nutch/site/forrest/src/documentation/content/xdocs/site.xml (original)
+++ nutch/site/forrest/src/documentation/content/xdocs/site.xml Tue Aug  2 
12:43:01 2011
@@ -34,9 +34,11 @@ See http://forrest.apache.org/docs/linki
 newslabel=News href=index.html /
 about   label=Abouthref=about.html /
 credits label=Credits  href=credits.html /
+thanks label=Thanks  href=ext:thanks /  
 store   label=Buy Stuffhref=ext:store /
 sponsorlabel=Sponsorship  href=ext:sponsor /
 licenselabel=License  href=ext:license / 
+security label=Security  href=ext:security /  
   /project
 
   docs label=Documentation
@@ -79,6 +81,8 @@ See http://forrest.apache.org/docs/linki
 release   href=http://www.apache.org/dyn/closer.cgi/nutch//
 license   href=http://www.apache.org/licenses//
 sponsor   href=http://www.apache.org/foundation/sponsorship.html; /
+thanks   href=http://www.apache.org/foundation/thanks.html; /
+security   href=http://www.apache.org/security/; /
   /external-refs
  
 /site

Modified: nutch/site/publish/about.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/about.html?rev=1153108r1=1153107r2=1153108view=diff
==
--- nutch/site/publish/about.html (original)
+++ nutch/site/publish/about.html Tue Aug  2 12:43:01 2011
@@ -153,6 +153,9 @@ document.write(Last Published:  + docu
 a href=credits.htmlCredits/a
 /div
 div class=menuitem
+a href=http://www.apache.org/foundation/thanks.html;Thanks/a
+/div
+div class=menuitem
 a href=http://www.cafepress.com/nutch/;Buy Stuff/a
 /div
 div class=menuitem
@@ -161,6 +164,9 @@ document.write(Last Published:  + docu
 div class=menuitem
 a href=http://www.apache.org/licenses/;License/a
 /div

svn commit: r1149508 - in /nutch/site: forrest/src/documentation/resources/images/nutch_logo_tm.gif publish/images/nutch_logo_tm.gif

2011-07-22 Thread lewismc
Author: lewismc
Date: Fri Jul 22 09:18:11 2011
New Revision: 1149508

URL: http://svn.apache.org/viewvc?rev=1149508view=rev
Log:
new Nutch 'tm' logo commit

Added:
nutch/site/forrest/src/documentation/resources/images/nutch_logo_tm.gif   
(with props)
nutch/site/publish/images/nutch_logo_tm.gif   (with props)

Added: nutch/site/forrest/src/documentation/resources/images/nutch_logo_tm.gif
URL: 
http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/resources/images/nutch_logo_tm.gif?rev=1149508view=auto
==
Binary file - no diff available.

Propchange: 
nutch/site/forrest/src/documentation/resources/images/nutch_logo_tm.gif
--
svn:mime-type = application/octet-stream

Added: nutch/site/publish/images/nutch_logo_tm.gif
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/images/nutch_logo_tm.gif?rev=1149508view=auto
==
Binary file - no diff available.

Propchange: nutch/site/publish/images/nutch_logo_tm.gif
--
svn:mime-type = application/octet-stream




svn commit: r1149641 - /nutch/branches/branch-1.4/conf/domain-urlfilter.txt

2011-07-22 Thread lewismc
Author: lewismc
Date: Fri Jul 22 15:50:06 2011
New Revision: 1149641

URL: http://svn.apache.org/viewvc?rev=1149641view=rev
Log:
commit to address NUTCH-1066 - very trivial update of domain-urlfilter.txt

Modified:
nutch/branches/branch-1.4/conf/domain-urlfilter.txt

Modified: nutch/branches/branch-1.4/conf/domain-urlfilter.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/conf/domain-urlfilter.txt?rev=1149641r1=1149640r2=1149641view=diff
==
--- nutch/branches/branch-1.4/conf/domain-urlfilter.txt (original)
+++ nutch/branches/branch-1.4/conf/domain-urlfilter.txt Fri Jul 22 15:50:06 2011
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# config file for urlfilter-domsin plugin
\ No newline at end of file
+# config file for urlfilter-domain plugin




svn commit: r1149263 - in /nutch/site: forrest/src/documentation/skinconf.xml publish/doap.rdf

2011-07-21 Thread lewismc
Author: lewismc
Date: Thu Jul 21 16:22:58 2011
New Revision: 1149263

URL: http://svn.apache.org/viewvc?rev=1149263view=rev
Log:
commit to address NUTCH-919 and NUTCH-920, hopefully this resolves NUTCH-914 
for the time being.

Modified:
nutch/site/forrest/src/documentation/skinconf.xml
nutch/site/publish/doap.rdf

Modified: nutch/site/forrest/src/documentation/skinconf.xml
URL: 
http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/skinconf.xml?rev=1149263r1=1149262r2=1149263view=diff
==
--- nutch/site/forrest/src/documentation/skinconf.xml (original)
+++ nutch/site/forrest/src/documentation/skinconf.xml Thu Jul 21 16:22:58 2011
@@ -68,7 +68,7 @@ which will be used to configure the chos
   project-nameNutch/project-name
   project-descriptionOpen Source Web Search Software/project-description
   project-urlhttp://nutch.apache.org//project-url
-  project-logoimages/nutch-logo.gif/project-logo
+  project-logoimages/nutch-logo.png/project-logo
 
   !-- group logo --
   group-nameApache/group-name

Modified: nutch/site/publish/doap.rdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/doap.rdf?rev=1149263r1=1149262r2=1149263view=diff
==
--- nutch/site/publish/doap.rdf (original)
+++ nutch/site/publish/doap.rdf Thu Jul 21 16:22:58 2011
@@ -6,31 +6,45 @@
  xmlns:asfext=http://projects.apache.org/ns/asfext#;
  xmlns:foaf=http://xmlns.com/foaf/0.1/;
 !--
-  ===
-
-   Copyright (c) 2006 The Apache Software Foundation.  
-   All rights reserved.
-
-  ===
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the License); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+   
+ http://www.apache.org/licenses/LICENSE-2.0
+   
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an AS IS BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
 --
-  Project rdf:about=http://nutch.apache.org/;
-created2006-02-28/created
+  Project rdf:about=http://nutch.apache.org;
+created2011-07-21/created
 license rdf:resource=http://usefulinc.com/doap/licenses/asl20; /
 nameApache Nutch/name
-homepage rdf:resource=http://nutch.apache.org/; /
+homepage rdf:resource=http://nutch.apache.org; /
 asfext:pmc rdf:resource=http://nutch.apache.org; /
-shortdescNutch is the open-source search engine./shortdesc
-descriptionNutch is open source web-search software. It builds
-on Lucene Java and Hadoop, adding web-specifics, such as a
-crawler, a link-graph database, parsers for HTML and other
-document formats, etc.
-/description
+shortdescApache Nutch is an open source web-search software 
project./shortdesc
+descriptionApache Nutch is an open source web-search software project. 
Stemming from Apache Lucene, it now builds on Apache Solr adding web-specifics, 
such as a crawler, a link-graph database and parsing support handled by Apache 
Tika for HTML and and array other document formats.
+
+Apache Nutch can run on a single machine, but gains a lot of its strength from 
running in a Hadoop cluster
+
+The system can be enhanced (eg other document formats can be parsed) using a 
highly flexible, easily extensible and thoroughly maintained plugin 
infrastructure./description
 bug-database rdf:resource=http://issues.apache.org/jira/browse/NUTCH; /
 mailing-list rdf:resource=http://nutch.apache.org/mailing_lists.html; /
-download-page 
rdf:resource=http://www.apache.org/dyn/closer.cgi/lucene/nutch/; /
+download-page rdf:resource=http://www.apache.org/dyn/closer.cgi/nutch/; 
/
 programming-languageJava/programming-language
 category rdf:resource=http://projects.apache.org/category/web-framework; 
/
-wiki rdf:resource=http://wiki.apache.org/nutch//
+release
+  Version
+nameApache Nutch 1.3/name
+created2011-06-07/created
+revision1.3/revision
+  /Version
+/release
 release
   Version
 branchbranch-1.0/branch
@@ -73,14 +87,14 @@
 /release
 repository
   SVNRepository
-location rdf:resource=http://svn.apache.org/repos/asf/nutch//
-browse rdf:resource=http://svn.apache.org/viewcvs.cgi/nutch//
+location rdf:resource=https

svn commit: r1149267 - in /nutch/site/publish: ./ skin/images/

2011-07-21 Thread lewismc
Author: lewismc
Date: Thu Jul 21 16:29:04 2011
New Revision: 1149267

URL: http://svn.apache.org/viewvc?rev=1149267view=rev
Log:
to incorporate new logo within site

Modified:
nutch/site/publish/about.html
nutch/site/publish/about.pdf
nutch/site/publish/bot.html
nutch/site/publish/bot.pdf
nutch/site/publish/credits.html
nutch/site/publish/credits.pdf
nutch/site/publish/i18n.html
nutch/site/publish/i18n.pdf
nutch/site/publish/index.html
nutch/site/publish/index.pdf
nutch/site/publish/issue_tracking.html
nutch/site/publish/issue_tracking.pdf
nutch/site/publish/linkmap.html
nutch/site/publish/linkmap.pdf
nutch/site/publish/mailing_lists.html
nutch/site/publish/mailing_lists.pdf
nutch/site/publish/nightly.html
nutch/site/publish/nightly.pdf
nutch/site/publish/skin/images/rc-b-l-15-1body-2menu-3menu.png
nutch/site/publish/skin/images/rc-b-r-15-1body-2menu-3menu.png

nutch/site/publish/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png
nutch/site/publish/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png

nutch/site/publish/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png

nutch/site/publish/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png
nutch/site/publish/skin/images/rc-t-r-15-1body-2menu-3menu.png
nutch/site/publish/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png

nutch/site/publish/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png

nutch/site/publish/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png
nutch/site/publish/version_control.html
nutch/site/publish/version_control.pdf

Modified: nutch/site/publish/about.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/about.html?rev=1149267r1=1149266r2=1149267view=diff
==
--- nutch/site/publish/about.html (original)
+++ nutch/site/publish/about.html Thu Jul 21 16:29:04 2011
@@ -39,7 +39,7 @@
 |start Project Logo
 +--
 div class=projectlogo
-a href=http://nutch.apache.org/;img class=logoImage alt=Nutch 
src=images/nutch-logo.gif title=Open Source Web Search Software/a
+a href=http://nutch.apache.org/;img class=logoImage alt=Nutch 
src=images/nutch-logo.png title=Open Source Web Search Software/a
 /div
 !--+
 |end Project Logo

Modified: nutch/site/publish/about.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/about.pdf?rev=1149267r1=1149266r2=1149267view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/bot.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/bot.html?rev=1149267r1=1149266r2=1149267view=diff
==
--- nutch/site/publish/bot.html (original)
+++ nutch/site/publish/bot.html Thu Jul 21 16:29:04 2011
@@ -39,7 +39,7 @@
 |start Project Logo
 +--
 div class=projectlogo
-a href=http://nutch.apache.org/;img class=logoImage alt=Nutch 
src=images/nutch-logo.gif title=Open Source Web Search Software/a
+a href=http://nutch.apache.org/;img class=logoImage alt=Nutch 
src=images/nutch-logo.png title=Open Source Web Search Software/a
 /div
 !--+
 |end Project Logo

Modified: nutch/site/publish/bot.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/bot.pdf?rev=1149267r1=1149266r2=1149267view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/credits.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/credits.html?rev=1149267r1=1149266r2=1149267view=diff
==
--- nutch/site/publish/credits.html (original)
+++ nutch/site/publish/credits.html Thu Jul 21 16:29:04 2011
@@ -39,7 +39,7 @@
 |start Project Logo
 +--
 div class=projectlogo
-a href=http://nutch.apache.org/;img class=logoImage alt=Nutch 
src=images/nutch-logo.gif title=Open Source Web Search Software/a
+a href=http://nutch.apache.org/;img class=logoImage alt=Nutch 
src=images/nutch-logo.png title=Open Source Web Search Software/a
 /div
 !--+
 |end Project Logo

Modified: nutch/site/publish/credits.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/credits.pdf?rev=1149267r1=1149266r2=1149267view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/i18n.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/i18n.html?rev=1149267r1=1149266r2=1149267view=diff
==
--- nutch/site/publish/i18n.html (original)
+++ nutch/site/publish/i18n.html Thu Jul 21 16:29:04 2011
@@ -39,7 +39,7 @@
 |start Project Logo
 +--
 div class=projectlogo
-a href=http

svn commit: r1149269 - in /nutch/site: forrest/src/documentation/ publish/

2011-07-21 Thread lewismc
Author: lewismc
Date: Thu Jul 21 16:40:09 2011
New Revision: 1149269

URL: http://svn.apache.org/viewvc?rev=1149269view=rev
Log:
changed logo to .gif file as .png is not accepted or liked by forrest I don't 
think.

Modified:
nutch/site/forrest/src/documentation/skinconf.xml
nutch/site/publish/about.html
nutch/site/publish/about.pdf
nutch/site/publish/bot.html
nutch/site/publish/bot.pdf
nutch/site/publish/credits.html
nutch/site/publish/credits.pdf
nutch/site/publish/i18n.html
nutch/site/publish/i18n.pdf
nutch/site/publish/index.html
nutch/site/publish/index.pdf
nutch/site/publish/issue_tracking.html
nutch/site/publish/issue_tracking.pdf
nutch/site/publish/linkmap.html
nutch/site/publish/linkmap.pdf
nutch/site/publish/mailing_lists.html
nutch/site/publish/mailing_lists.pdf
nutch/site/publish/nightly.html
nutch/site/publish/nightly.pdf
nutch/site/publish/version_control.html
nutch/site/publish/version_control.pdf

Modified: nutch/site/forrest/src/documentation/skinconf.xml
URL: 
http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/skinconf.xml?rev=1149269r1=1149268r2=1149269view=diff
==
--- nutch/site/forrest/src/documentation/skinconf.xml (original)
+++ nutch/site/forrest/src/documentation/skinconf.xml Thu Jul 21 16:40:09 2011
@@ -68,7 +68,7 @@ which will be used to configure the chos
   project-nameNutch/project-name
   project-descriptionOpen Source Web Search Software/project-description
   project-urlhttp://nutch.apache.org//project-url
-  project-logoimages/nutch-logo.png/project-logo
+  project-logoimages/nutch-logo-tm.gif/project-logo
 
   !-- group logo --
   group-nameApache/group-name

Modified: nutch/site/publish/about.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/about.html?rev=1149269r1=1149268r2=1149269view=diff
==
--- nutch/site/publish/about.html (original)
+++ nutch/site/publish/about.html Thu Jul 21 16:40:09 2011
@@ -39,7 +39,7 @@
 |start Project Logo
 +--
 div class=projectlogo
-a href=http://nutch.apache.org/;img class=logoImage alt=Nutch 
src=images/nutch-logo.png title=Open Source Web Search Software/a
+a href=http://nutch.apache.org/;img class=logoImage alt=Nutch 
src=images/nutch-logo-tm.gif title=Open Source Web Search Software/a
 /div
 !--+
 |end Project Logo

Modified: nutch/site/publish/about.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/about.pdf?rev=1149269r1=1149268r2=1149269view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/bot.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/bot.html?rev=1149269r1=1149268r2=1149269view=diff
==
--- nutch/site/publish/bot.html (original)
+++ nutch/site/publish/bot.html Thu Jul 21 16:40:09 2011
@@ -39,7 +39,7 @@
 |start Project Logo
 +--
 div class=projectlogo
-a href=http://nutch.apache.org/;img class=logoImage alt=Nutch 
src=images/nutch-logo.png title=Open Source Web Search Software/a
+a href=http://nutch.apache.org/;img class=logoImage alt=Nutch 
src=images/nutch-logo-tm.gif title=Open Source Web Search Software/a
 /div
 !--+
 |end Project Logo

Modified: nutch/site/publish/bot.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/bot.pdf?rev=1149269r1=1149268r2=1149269view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/credits.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/credits.html?rev=1149269r1=1149268r2=1149269view=diff
==
--- nutch/site/publish/credits.html (original)
+++ nutch/site/publish/credits.html Thu Jul 21 16:40:09 2011
@@ -39,7 +39,7 @@
 |start Project Logo
 +--
 div class=projectlogo
-a href=http://nutch.apache.org/;img class=logoImage alt=Nutch 
src=images/nutch-logo.png title=Open Source Web Search Software/a
+a href=http://nutch.apache.org/;img class=logoImage alt=Nutch 
src=images/nutch-logo-tm.gif title=Open Source Web Search Software/a
 /div
 !--+
 |end Project Logo

Modified: nutch/site/publish/credits.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/credits.pdf?rev=1149269r1=1149268r2=1149269view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/i18n.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/i18n.html?rev=1149269r1=1149268r2=1149269view=diff
==
--- nutch/site/publish/i18n.html (original)
+++ nutch/site/publish/i18n.html Thu Jul 21 16:40:09

svn commit: r1149280 - in /nutch/site: forrest/src/documentation/ publish/

2011-07-21 Thread lewismc
Author: lewismc
Date: Thu Jul 21 17:51:40 2011
New Revision: 1149280

URL: http://svn.apache.org/viewvc?rev=1149280view=rev
Log:
rebuild to try and incorporate new logo...

Modified:
nutch/site/forrest/src/documentation/skinconf.xml
nutch/site/publish/about.html
nutch/site/publish/about.pdf
nutch/site/publish/bot.html
nutch/site/publish/bot.pdf
nutch/site/publish/credits.html
nutch/site/publish/credits.pdf
nutch/site/publish/i18n.html
nutch/site/publish/i18n.pdf
nutch/site/publish/index.html
nutch/site/publish/index.pdf
nutch/site/publish/issue_tracking.html
nutch/site/publish/issue_tracking.pdf
nutch/site/publish/linkmap.html
nutch/site/publish/linkmap.pdf
nutch/site/publish/mailing_lists.html
nutch/site/publish/mailing_lists.pdf
nutch/site/publish/nightly.html
nutch/site/publish/nightly.pdf
nutch/site/publish/version_control.html
nutch/site/publish/version_control.pdf

Modified: nutch/site/forrest/src/documentation/skinconf.xml
URL: 
http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/skinconf.xml?rev=1149280r1=1149279r2=1149280view=diff
==
--- nutch/site/forrest/src/documentation/skinconf.xml (original)
+++ nutch/site/forrest/src/documentation/skinconf.xml Thu Jul 21 17:51:40 2011
@@ -68,7 +68,7 @@ which will be used to configure the chos
   project-nameNutch/project-name
   project-descriptionOpen Source Web Search Software/project-description
   project-urlhttp://nutch.apache.org//project-url
-  project-logoimages/nutch-logo-tm.gif/project-logo
+  project-logoimages/nutch_logo_tm.gif/project-logo
 
   !-- group logo --
   group-nameApache/group-name

Modified: nutch/site/publish/about.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/about.html?rev=1149280r1=1149279r2=1149280view=diff
==
--- nutch/site/publish/about.html (original)
+++ nutch/site/publish/about.html Thu Jul 21 17:51:40 2011
@@ -39,7 +39,7 @@
 |start Project Logo
 +--
 div class=projectlogo
-a href=http://nutch.apache.org/;img class=logoImage alt=Nutch 
src=images/nutch-logo-tm.gif title=Open Source Web Search Software/a
+a href=http://nutch.apache.org/;img class=logoImage alt=Nutch 
src=images/nutch_logo_tm.gif title=Open Source Web Search Software/a
 /div
 !--+
 |end Project Logo

Modified: nutch/site/publish/about.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/about.pdf?rev=1149280r1=1149279r2=1149280view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/bot.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/bot.html?rev=1149280r1=1149279r2=1149280view=diff
==
--- nutch/site/publish/bot.html (original)
+++ nutch/site/publish/bot.html Thu Jul 21 17:51:40 2011
@@ -39,7 +39,7 @@
 |start Project Logo
 +--
 div class=projectlogo
-a href=http://nutch.apache.org/;img class=logoImage alt=Nutch 
src=images/nutch-logo-tm.gif title=Open Source Web Search Software/a
+a href=http://nutch.apache.org/;img class=logoImage alt=Nutch 
src=images/nutch_logo_tm.gif title=Open Source Web Search Software/a
 /div
 !--+
 |end Project Logo

Modified: nutch/site/publish/bot.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/bot.pdf?rev=1149280r1=1149279r2=1149280view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/credits.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/credits.html?rev=1149280r1=1149279r2=1149280view=diff
==
--- nutch/site/publish/credits.html (original)
+++ nutch/site/publish/credits.html Thu Jul 21 17:51:40 2011
@@ -39,7 +39,7 @@
 |start Project Logo
 +--
 div class=projectlogo
-a href=http://nutch.apache.org/;img class=logoImage alt=Nutch 
src=images/nutch-logo-tm.gif title=Open Source Web Search Software/a
+a href=http://nutch.apache.org/;img class=logoImage alt=Nutch 
src=images/nutch_logo_tm.gif title=Open Source Web Search Software/a
 /div
 !--+
 |end Project Logo

Modified: nutch/site/publish/credits.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/credits.pdf?rev=1149280r1=1149279r2=1149280view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/i18n.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/i18n.html?rev=1149280r1=1149279r2=1149280view=diff
==
--- nutch/site/publish/i18n.html (original)
+++ nutch/site/publish/i18n.html Thu Jul 21 17:51:40 2011
@@ -39,7 +39,7

svn commit: r1148482 - in /nutch/site/forrest/src/documentation/content/xdocs: about.xml bot.xml credits.xml index.xml mailing_lists.xml site.xml

2011-07-19 Thread lewismc
Author: lewismc
Date: Tue Jul 19 18:42:33 2011
New Revision: 1148482

URL: http://svn.apache.org/viewvc?rev=1148482view=rev
Log:
commit to ensure site src has been committed alongside publish

Modified:
nutch/site/forrest/src/documentation/content/xdocs/about.xml
nutch/site/forrest/src/documentation/content/xdocs/bot.xml
nutch/site/forrest/src/documentation/content/xdocs/credits.xml
nutch/site/forrest/src/documentation/content/xdocs/index.xml
nutch/site/forrest/src/documentation/content/xdocs/mailing_lists.xml
nutch/site/forrest/src/documentation/content/xdocs/site.xml

Modified: nutch/site/forrest/src/documentation/content/xdocs/about.xml
URL: 
http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/about.xml?rev=1148482r1=1148481r2=1148482view=diff
==
--- nutch/site/forrest/src/documentation/content/xdocs/about.xml (original)
+++ nutch/site/forrest/src/documentation/content/xdocs/about.xml Tue Jul 19 
18:42:33 2011
@@ -20,24 +20,26 @@
 document 
 
   header 
-titleAbout Nutch/title 
+titleAbout Apache Nutch/title 
   /header 
 
   body 
 
 section
-  titleOverview/title pNutch is open source web-search
-  software.  It builds on a href=ext:luceneLucene and Solr/a,
-  adding web-specifics, such as a crawler, a link-graph database,
-  parsers for HTML and other document formats, etc./p
+  titleOverview/title pApache Nutch is an open source web-search
+  software project.  Stemming from a href=ext:luceneApache Lucene/a, 
it now builds 
+  on a href=ext:solrApache Solr/a adding web-specifics, such as a 
crawler, 
+  a link-graph database and parsing support handled by a 
href=ext:tikaApache Tika/a
+  for HTML and and array other document formats./p
 
-  pNutch can run on a single machine, but gains a lot of its
+  pApache Nutch can run on a single machine, but gains a lot of its
   strength from running in a a href=ext:hadoopHadoop cluster/a/p
 
   pThe system can be enhanced (eg other document formats can be 
-  parsed) using a plugin mechanism./p
+  parsed) using a highly flexible, easily extensible and thoroughly 
maintained
+   plugin infrastructure./p
 
-  pFor more information about Nutch, please see the a
+  pFor more information about Apache Nutch, please see the a
   href=ext:wikiNutch wiki./a/p
 
   

Modified: nutch/site/forrest/src/documentation/content/xdocs/bot.xml
URL: 
http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/bot.xml?rev=1148482r1=1148481r2=1148482view=diff
==
--- nutch/site/forrest/src/documentation/content/xdocs/bot.xml (original)
+++ nutch/site/forrest/src/documentation/content/xdocs/bot.xml Tue Jul 19 
18:42:33 2011
@@ -18,7 +18,7 @@
 document
 
 header
-titleNutch robot/title
+titleApache Nutch robot/title
 /header
 
 body

Modified: nutch/site/forrest/src/documentation/content/xdocs/credits.xml
URL: 
http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/credits.xml?rev=1148482r1=1148481r2=1148482view=diff
==
--- nutch/site/forrest/src/documentation/content/xdocs/credits.xml (original)
+++ nutch/site/forrest/src/documentation/content/xdocs/credits.xml Tue Jul 19 
18:42:33 2011
@@ -21,7 +21,7 @@
 document 
 
 header
-  titleNutch credits/title 
+  titleApache Nutch Credits/title 
 /header 
 
 body
@@ -35,9 +35,9 @@
   liDogacan Güney/li
   lia href=http://www.digitalpebble.com/;Julien Nioche/a/li
   lia href=http://openindex.io/;Markus Jelsma/a/li
-  lia href=http://www.sematext.com/;Otis Gospodnetić/a/li
   lia href=http://people.apache.org/~siren;Sami Siren/a/li
   lia href=http://techvineyard.blogspot.com/;Alexis de Tréglodé/a/li
+  liLewis John McGibbney/li
 /ul
 /section
 
@@ -49,6 +49,7 @@
   liJohn Xing/li
   lia href=http://www.eecs.umich.edu/~michjc/;Mike Cafarella/a/li
   liPiotr Kosiorowski/li
+  lia href=http://www.sematext.com/;Otis Gospodnetić/a/li
 /ul
 /section
 
@@ -76,6 +77,9 @@
 
   lia href=http://www.archive.org/;The Internet Archive/a
   hosts some Nutch work./li
+
+  liWe would also like to acknowledge the a 
href=http://www.apache.org/foundation/thanks.html;Apache 
+Software Foundation Sponsors/a./li
 /ul
 /section
 

Modified: nutch/site/forrest/src/documentation/content/xdocs/index.xml
URL: 
http://svn.apache.org/viewvc/nutch/site/forrest/src/documentation/content/xdocs/index.xml?rev=1148482r1=1148481r2=1148482view=diff
==
--- nutch/site/forrest/src/documentation/content/xdocs/index.xml (original)
+++ nutch/site/forrest/src/documentation/content/xdocs/index.xml Tue Jul 19 
18:42:33 2011
@@ -20,13 +20,17 @@
 document 
 
   header 
-titleWelcome to Nutch

svn commit: r1147813 - in /nutch/branches/branch-1.4: CHANGES.txt src/bin/nutch

2011-07-18 Thread lewismc
Author: lewismc
Date: Mon Jul 18 11:22:52 2011
New Revision: 1147813

URL: http://svn.apache.org/viewvc?rev=1147813view=rev
Log:
commit to resolve and close NUTCH-1059 and changes.txt

Modified:
nutch/branches/branch-1.4/CHANGES.txt
nutch/branches/branch-1.4/src/bin/nutch

Modified: nutch/branches/branch-1.4/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1147813r1=1147812r2=1147813view=diff
==
--- nutch/branches/branch-1.4/CHANGES.txt (original)
+++ nutch/branches/branch-1.4/CHANGES.txt Mon Jul 18 11:22:52 2011
@@ -2,6 +2,12 @@ Nutch Change Log
 
 Release 1.4 - Current development
 
+* NUTCH-1059 Remove convdb command from /bin/nutch (lewismc)
+
+* NUTCH-1019 Edit comment in org.apache.nutch.crawl.Crawl to reflect removal 
of legacy (lewismc)
+
+* NUTCH-1023 Trivial error in error message for 
org.apache.nutch.crawl.LinkDbReader (lewismc)
+
 * NUTCH-1043 Add pattern for filtering .js in default url filters (jnioche)
 
 * NUTCH-1054 LinkDB optional during indexing (jnioche)

Modified: nutch/branches/branch-1.4/src/bin/nutch
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/bin/nutch?rev=1147813r1=1147812r2=1147813view=diff
==
--- nutch/branches/branch-1.4/src/bin/nutch (original)
+++ nutch/branches/branch-1.4/src/bin/nutch Mon Jul 18 11:22:52 2011
@@ -49,7 +49,6 @@ if [ $# = 0 ]; then
   echo where COMMAND is one of:
   echo   crawl one-step crawler for intranets
   echo   readdbread / dump crawl db
-  echo   convdbconvert crawl db from pre-0.9 format
   echo   mergedb   merge crawldb-s, with optional filtering
   echo   readlinkdbread / dump link db
   echo   injectinject new urls into the database
@@ -206,8 +205,6 @@ elif [ $COMMAND = parse ] ; then
   CLASS=org.apache.nutch.parse.ParseSegment
 elif [ $COMMAND = readdb ] ; then
   CLASS=org.apache.nutch.crawl.CrawlDbReader
-elif [ $COMMAND = convdb ] ; then
-  CLASS=org.apache.nutch.tools.compat.CrawlDbConverter
 elif [ $COMMAND = mergedb ] ; then
   CLASS=org.apache.nutch.crawl.CrawlDbMerger
 elif [ $COMMAND = readlinkdb ] ; then




svn commit: r1147815 - in /nutch/branches/branch-1.4: CHANGES.txt src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html

2011-07-18 Thread lewismc
Author: lewismc
Date: Mon Jul 18 11:34:47 2011
New Revision: 1147815

URL: http://svn.apache.org/viewvc?rev=1147815view=rev
Log:
commit and close for NUTCH-1055 and changes.txt

Modified:
nutch/branches/branch-1.4/CHANGES.txt

nutch/branches/branch-1.4/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html

Modified: nutch/branches/branch-1.4/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1147815r1=1147814r2=1147815view=diff
==
--- nutch/branches/branch-1.4/CHANGES.txt (original)
+++ nutch/branches/branch-1.4/CHANGES.txt Mon Jul 18 11:34:47 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 1.4 - Current development
 
+* NUTCH-1055 upgrade package.html file in language identifier plugin (lewismc)
+
 * NUTCH-1059 Remove convdb command from /bin/nutch (lewismc)
 
 * NUTCH-1019 Edit comment in org.apache.nutch.crawl.Crawl to reflect removal 
of legacy (lewismc)

Modified: 
nutch/branches/branch-1.4/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html?rev=1147815r1=1147814r2=1147815view=diff
==
--- 
nutch/branches/branch-1.4/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html
 (original)
+++ 
nutch/branches/branch-1.4/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html
 Mon Jul 18 11:34:47 2011
@@ -1,6 +1,6 @@
 html
 body
 pText document language identifier./ppLanguage profiles are based on 
material from
-a 
href=http://www.isi.edu/~koehn/europarl/;http://www.isi.edu/~koehn/europarl//a./p
+a 
href=http://www.homepages.inf.ed.ac.uk/pkoehn/publications/europarl.ps/;http://www.homepages.inf.ed.ac.uk/pkoehn/publications/europarl.ps//a./p
 /body
 /html




svn commit: r1147817 - in /nutch/trunk: CHANGES.txt src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html

2011-07-18 Thread lewismc
Author: lewismc
Date: Mon Jul 18 11:43:26 2011
New Revision: 1147817

URL: http://svn.apache.org/viewvc?rev=1147817view=rev
Log:
commit and close of NUTCH-1055 and changes.txt, this commit does not affect 
functionality it is merely a hyperlink reference to the document used as the 
basis for the language identifier plugin

Modified:
nutch/trunk/CHANGES.txt

nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1147817r1=1147816r2=1147817view=diff
==
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Mon Jul 18 11:43:26 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 2.0 - Current Development
 
+* NUTCH-1055 upgrade package.html file in language identifier plugin (lewismc)
+
 * NUTCH-1043 Add pattern for filtering .js in default url filters (jnioche)
 
 * NUTCH-1027 Degrade log level of `can't find rules for scope` (markus)

Modified: 
nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html?rev=1147817r1=1147816r2=1147817view=diff
==
--- 
nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html
 (original)
+++ 
nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/package.html
 Mon Jul 18 11:43:26 2011
@@ -1,6 +1,6 @@
 html
 body
 pText document language identifier./ppLanguage profiles are based on 
material from
-a 
href=http://www.isi.edu/~koehn/europarl/;http://www.isi.edu/~koehn/europarl//a./p
+a 
href=http://www.homepages.inf.ed.ac.uk/pkoehn/publications/europarl.ps/;http://www.homepages.inf.ed.ac.uk/pkoehn/publications/europarl.ps//a./p
 /body
 /html




svn commit: r1147969 - in /nutch/site/publish: about.pdf bot.pdf credits.html credits.pdf i18n.pdf index.pdf issue_tracking.pdf linkmap.pdf mailing_lists.html mailing_lists.pdf nightly.pdf version_con

2011-07-18 Thread lewismc
Author: lewismc
Date: Mon Jul 18 16:59:18 2011
New Revision: 1147969

URL: http://svn.apache.org/viewvc?rev=1147969view=rev
Log:
update to commit and close NUTCH-1048 and to move Otis from committer to former 
committer

Modified:
nutch/site/publish/about.pdf
nutch/site/publish/bot.pdf
nutch/site/publish/credits.html
nutch/site/publish/credits.pdf
nutch/site/publish/i18n.pdf
nutch/site/publish/index.pdf
nutch/site/publish/issue_tracking.pdf
nutch/site/publish/linkmap.pdf
nutch/site/publish/mailing_lists.html
nutch/site/publish/mailing_lists.pdf
nutch/site/publish/nightly.pdf
nutch/site/publish/version_control.pdf

Modified: nutch/site/publish/about.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/about.pdf?rev=1147969r1=1147968r2=1147969view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/bot.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/bot.pdf?rev=1147969r1=1147968r2=1147969view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/credits.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/credits.html?rev=1147969r1=1147968r2=1147969view=diff
==
--- nutch/site/publish/credits.html (original)
+++ nutch/site/publish/credits.html Mon Jul 18 16:59:18 2011
@@ -284,10 +284,6 @@ document.write(Last Published:  + docu
 /li
   
 li
-a href=http://www.sematext.com/;Otis Gospodnetić/a
-/li
-  
-li
 a href=http://people.apache.org/~siren;Sami Siren/a
 /li
   
@@ -301,7 +297,7 @@ document.write(Last Published:  + docu
 /div
 
 
-a name=N10042/aa name=Former+Committers/a
+a name=N1003D/aa name=Former+Committers/a
 h2 class=h3Former Committers/h2
 div class=section
 ul
@@ -321,6 +317,10 @@ document.write(Last Published:  + docu
 /li
   
 liPiotr Kosiorowski/li
+  
+li
+a href=http://www.sematext.com/;Otis Gospodnetić/a
+/li
 
 /ul
 /div

Modified: nutch/site/publish/credits.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/credits.pdf?rev=1147969r1=1147968r2=1147969view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/i18n.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/i18n.pdf?rev=1147969r1=1147968r2=1147969view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/index.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/index.pdf?rev=1147969r1=1147968r2=1147969view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/issue_tracking.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/issue_tracking.pdf?rev=1147969r1=1147968r2=1147969view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/linkmap.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/linkmap.pdf?rev=1147969r1=1147968r2=1147969view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/mailing_lists.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/mailing_lists.html?rev=1147969r1=1147968r2=1147969view=diff
==
--- nutch/site/publish/mailing_lists.html (original)
+++ nutch/site/publish/mailing_lists.html Mon Jul 18 16:59:18 2011
@@ -283,11 +283,11 @@ document.write(Last Published:  + docu
 /li
 
 li
-a href=http://www.mail-archive.com/nutch-user%40lucene.apache.org/;Search 
Old List Archive/a
+a href=http://www.mail-archive.com/nutch-user%40lucene.apache.org/;Search 
Old List Archive (nu...@lucene.apache.org)/a
 /li
 
 li
-a href=http://nutch.apache.org/mail/user/;View List Archive/a
+a href=http://mail-archives.apache.org/mod_mbox/nutch-user/;View List 
Archive/a
 /li
   
 /ul
@@ -322,11 +322,11 @@ document.write(Last Published:  + docu
 /li
 
 li
-a href=http://www.mail-archive.com/nutch-dev%40lucene.apache.org/;Search 
Old List Archive/a
+a href=http://www.mail-archive.com/nutch-dev%40lucene.apache.org/;Search 
Old List Archive (nu...@lucene.apache.org)/a
 /li
 
 li
-a href=http://nutch.apache.org/mail/dev/;View List Archive/a
+a href=http://mail-archives.apache.org/mod_mbox/nutch-dev/;View List 
Archive/a
 /li
   
 /ul
@@ -357,11 +357,11 @@ document.write(Last Published:  + docu
 /li
 
 li
-a 
href=http://www.mail-archive.com/nutch-commits%40lucene.apache.org/;Search 
Old List Archive/a
+a 
href=http://www.mail-archive.com/nutch-commits%40lucene.apache.org/;Search 
Old List

svn commit: r1147712 - in /nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl: Crawl.java NUTCH-1019-crawl-comment.patch

2011-07-17 Thread lewismc
Author: lewismc
Date: Sun Jul 17 20:39:10 2011
New Revision: 1147712

URL: http://svn.apache.org/viewvc?rev=1147712view=rev
Log:
commit to resolve and close NUTCH-1019, this commit does not affect any 
functionality but instead adds further minor comments to the code for the crawl 
class.

Added:

nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/NUTCH-1019-crawl-comment.patch
Modified:
nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/Crawl.java

Modified: nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/Crawl.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/Crawl.java?rev=1147712r1=1147711r2=1147712view=diff
==
--- nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/Crawl.java 
(original)
+++ nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/Crawl.java Sun 
Jul 17 20:39:10 2011
@@ -48,7 +48,8 @@ public class Crawl extends Configured im
   }
 
 
-  /* Perform complete crawling and indexing given a set of root urls. */
+  /* Perform complete crawling and indexing (to Solr) given a set of root urls 
and the -solr
+ parameter respectively. More information and Usage parameters can be 
found below. */
   public static void main(String args[]) throws Exception {
 Configuration conf = NutchConfiguration.create();
 int res = ToolRunner.run(conf, new Crawl(), args);

Added: 
nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/NUTCH-1019-crawl-comment.patch
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/NUTCH-1019-crawl-comment.patch?rev=1147712view=auto
==
--- 
nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/NUTCH-1019-crawl-comment.patch
 (added)
+++ 
nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/NUTCH-1019-crawl-comment.patch
 Sun Jul 17 20:39:10 2011
@@ -0,0 +1,14 @@
+Index: Crawl.java
+===
+--- Crawl.java (revision 1147708)
 Crawl.java (working copy)
+@@ -48,7 +48,8 @@
+   }
+ 
+ 
+-  /* Perform complete crawling and indexing given a set of root urls. */
++  /* Perform complete crawling and indexing (to Solr) given a set of root 
urls and the -solr
++ parameter respectively. More information and Usage parameters can be 
found below. */
+   public static void main(String args[]) throws Exception {
+ Configuration conf = NutchConfiguration.create();
+ int res = ToolRunner.run(conf, new Crawl(), args);




svn commit: r1147268 - in /nutch/site/publish: ./ skin/images/

2011-07-15 Thread lewismc
Author: lewismc
Date: Fri Jul 15 18:16:55 2011
New Revision: 1147268

URL: http://svn.apache.org/viewvc?rev=1147268view=rev
Log:
First commit action, rebuilding site to address various issues covered within 
NUTCH-914

Modified:
nutch/site/publish/about.html
nutch/site/publish/about.pdf
nutch/site/publish/bot.html
nutch/site/publish/bot.pdf
nutch/site/publish/credits.html
nutch/site/publish/credits.pdf
nutch/site/publish/i18n.html
nutch/site/publish/i18n.pdf
nutch/site/publish/index.html
nutch/site/publish/index.pdf
nutch/site/publish/issue_tracking.html
nutch/site/publish/issue_tracking.pdf
nutch/site/publish/linkmap.html
nutch/site/publish/linkmap.pdf
nutch/site/publish/mailing_lists.html
nutch/site/publish/mailing_lists.pdf
nutch/site/publish/nightly.html
nutch/site/publish/nightly.pdf
nutch/site/publish/skin/images/rc-b-l-15-1body-2menu-3menu.png
nutch/site/publish/skin/images/rc-b-r-15-1body-2menu-3menu.png

nutch/site/publish/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png
nutch/site/publish/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png

nutch/site/publish/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png

nutch/site/publish/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png
nutch/site/publish/skin/images/rc-t-r-15-1body-2menu-3menu.png
nutch/site/publish/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png

nutch/site/publish/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png

nutch/site/publish/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png
nutch/site/publish/version_control.html
nutch/site/publish/version_control.pdf

Modified: nutch/site/publish/about.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/about.html?rev=1147268r1=1147267r2=1147268view=diff
==
--- nutch/site/publish/about.html (original)
+++ nutch/site/publish/about.html Fri Jul 15 18:16:55 2011
@@ -3,9 +3,9 @@
 head
 META http-equiv=Content-Type content=text/html; charset=UTF-8
 meta content=Apache Forrest name=Generator
-meta name=Forrest-version content=0.9
+meta name=Forrest-version content=0.10-dev
 meta name=Forrest-skin-name content=nutch
-titleAbout Nutch/title
+titleAbout Apache Nutch/title
 link type=text/css href=skin/basic.css rel=stylesheet
 link media=screen type=text/css href=skin/screen.css rel=stylesheet
 link media=print type=text/css href=skin/print.css rel=stylesheet
@@ -155,6 +155,12 @@ document.write(Last Published:  + docu
 div class=menuitem
 a href=http://www.cafepress.com/nutch/;Buy Stuff/a
 /div
+div class=menuitem
+a href=http://www.apache.org/foundation/sponsorship.html;Sponsorship/a
+/div
+div class=menuitem
+a href=http://www.apache.org/licenses/;License/a
+/div
 /div
 div onclick=SwitchMenu('menu_1.2', 'skin/') id=menu_1.2Title 
class=menutitleDocumentation/div
 div id=menu_1.2 class=menuitemgroup
@@ -165,7 +171,7 @@ document.write(Last Published:  + docu
 a href=http://wiki.apache.org/nutch/;Wiki/a
 /div
 div class=menuitem
-a href=http://wiki.apache.org/nutch/NutchTutorial;Tutorial/a
+a href=http://wiki.apache.org/nutch/RunningNutchAndSolr;Tutorial/a
 /div
 div class=menuitem
 a href=bot.htmlRobot /a
@@ -235,7 +241,7 @@ document.write(Last Published:  + docu
 a class=dida href=about.pdfimg alt=PDF -icon 
src=skin/images/pdfdoc.gif class=skinbr
 PDF/a
 /div
-h1About Nutch/h1
+h1About Apache Nutch/h1
 div id=minitoc-area
 ul class=minitoc
 li
@@ -248,16 +254,18 @@ document.write(Last Published:  + docu
 a name=N1000E/aa name=Overview/a
 h2 class=h3Overview/h2
 div class=section
-pNutch is open source web-search
-  software.  It builds on a href=http://lucene.apache.org/java/;Lucene 
and Solr/a,
-  adding web-specifics, such as a crawler, a link-graph database,
-  parsers for HTML and other document formats, etc./p
-pNutch can run on a single machine, but gains a lot of its
+pApache Nutch is an open source web-search
+  software project.  Stemming from a 
href=http://lucene.apache.org/java/;Apache Lucene/a, it now builds 
+  on a href=http://lucene.apache.org/solr/;Apache Solr/a adding 
web-specifics, such as a crawler, 
+  a link-graph database and parsing support handled by a 
href=http://tika.apache.org/;Apache Tika/a
+  for HTML and and array other document formats./p
+pApache Nutch can run on a single machine, but gains a lot of its
   strength from running in a a href=http://hadoop.apache.org/;Hadoop 
cluster/a
 /p
 pThe system can be enhanced (eg other document formats can be 
-  parsed) using a plugin mechanism./p
-pFor more information about Nutch, please see the a 
href=http://wiki.apache.org/nutch/;Nutch wiki./a
+  parsed) using a highly flexible, easily extensible and thoroughly 
maintained
+   plugin infrastructure./p
+pFor more information about Apache Nutch

svn commit: r1147276 - in /nutch/site/publish: about.pdf bot.pdf credits.pdf i18n.pdf index.html index.pdf issue_tracking.pdf linkmap.pdf mailing_lists.pdf nightly.pdf version_control.pdf

2011-07-15 Thread lewismc
Author: lewismc
Date: Fri Jul 15 18:43:17 2011
New Revision: 1147276

URL: http://svn.apache.org/viewvc?rev=1147276view=rev
Log:
Trivial aesthetic improvement

Modified:
nutch/site/publish/about.pdf
nutch/site/publish/bot.pdf
nutch/site/publish/credits.pdf
nutch/site/publish/i18n.pdf
nutch/site/publish/index.html
nutch/site/publish/index.pdf
nutch/site/publish/issue_tracking.pdf
nutch/site/publish/linkmap.pdf
nutch/site/publish/mailing_lists.pdf
nutch/site/publish/nightly.pdf
nutch/site/publish/version_control.pdf

Modified: nutch/site/publish/about.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/about.pdf?rev=1147276r1=1147275r2=1147276view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/bot.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/bot.pdf?rev=1147276r1=1147275r2=1147276view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/credits.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/credits.pdf?rev=1147276r1=1147275r2=1147276view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/i18n.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/i18n.pdf?rev=1147276r1=1147275r2=1147276view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/index.html
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/index.html?rev=1147276r1=1147275r2=1147276view=diff
==
--- nutch/site/publish/index.html (original)
+++ nutch/site/publish/index.html Fri Jul 15 18:43:17 2011
@@ -246,7 +246,7 @@ document.write(Last Published:  + docu
 /div
 h1Welcome to Apache Nutchreg;/h1
 div class=abstractApache Nutch is an open source web-search software 
project. Nutch is a project of the Apache Software Foundation
-and is part of the larger Apache community of developers and users. 
Apache NutchMore about Nutch can be found here.
+and is part of the larger Apache community of developers and users. 
More about Nutch can be found here.
   /div
 div id=minitoc-area
 ul class=minitoc

Modified: nutch/site/publish/index.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/index.pdf?rev=1147276r1=1147275r2=1147276view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/issue_tracking.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/issue_tracking.pdf?rev=1147276r1=1147275r2=1147276view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/linkmap.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/linkmap.pdf?rev=1147276r1=1147275r2=1147276view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/mailing_lists.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/mailing_lists.pdf?rev=1147276r1=1147275r2=1147276view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/nightly.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/nightly.pdf?rev=1147276r1=1147275r2=1147276view=diff
==
Binary files - no diff available.

Modified: nutch/site/publish/version_control.pdf
URL: 
http://svn.apache.org/viewvc/nutch/site/publish/version_control.pdf?rev=1147276r1=1147275r2=1147276view=diff
==
Binary files - no diff available.




<    2   3   4   5   6   7