svn commit: r745446 - in /lucene/nutch/trunk/src: java/org/apache/nutch/util/ plugin/field-basic/src/java/org/apache/nutch/indexer/field/basic/ plugin/field-boost/src/java/org/apache/nutch/indexer/fie

2009-02-18 Thread siren
Author: siren
Date: Wed Feb 18 09:14:29 2009
New Revision: 745446

URL: http://svn.apache.org/viewvc?rev=745446view=rev
Log:
NUTCH-688 add missing headers, part 2 rest

Modified:

lucene/nutch/trunk/src/java/org/apache/nutch/util/GenericWritableConfigurable.java
lucene/nutch/trunk/src/java/org/apache/nutch/util/NodeWalker.java

lucene/nutch/trunk/src/plugin/field-basic/src/java/org/apache/nutch/indexer/field/basic/BasicFieldFilter.java

lucene/nutch/trunk/src/plugin/field-boost/src/java/org/apache/nutch/indexer/field/boost/BoostFieldFilter.java

lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/json/JSONResponseWriter.java

lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/xml/XMLResponseWriter.java

lucene/nutch/trunk/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/LinkAnalysisScoringFilter.java

lucene/nutch/trunk/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java

lucene/nutch/trunk/src/plugin/urlfilter-domain/src/test/org/apache/nutch/urlfilter/domain/TestDomainURLFilter.java

Modified: 
lucene/nutch/trunk/src/java/org/apache/nutch/util/GenericWritableConfigurable.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/GenericWritableConfigurable.java?rev=745446r1=745445r2=745446view=diff
==
--- 
lucene/nutch/trunk/src/java/org/apache/nutch/util/GenericWritableConfigurable.java
 (original)
+++ 
lucene/nutch/trunk/src/java/org/apache/nutch/util/GenericWritableConfigurable.java
 Wed Feb 18 09:14:29 2009
@@ -1,3 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the License); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.nutch.util;
 
 import java.io.DataInput;

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/NodeWalker.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/NodeWalker.java?rev=745446r1=745445r2=745446view=diff
==
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/NodeWalker.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/util/NodeWalker.java Wed Feb 
18 09:14:29 2009
@@ -1,3 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the License); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.nutch.util;
 
 import java.util.Stack;

Modified: 
lucene/nutch/trunk/src/plugin/field-basic/src/java/org/apache/nutch/indexer/field/basic/BasicFieldFilter.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/field-basic/src/java/org/apache/nutch/indexer/field/basic/BasicFieldFilter.java?rev=745446r1=745445r2=745446view=diff
==
--- 
lucene/nutch/trunk/src/plugin/field-basic/src/java/org/apache/nutch/indexer/field/basic/BasicFieldFilter.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/field-basic/src/java/org/apache/nutch/indexer/field/basic/BasicFieldFilter.java
 Wed Feb 18 09:14:29 2009
@@ -1,3 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the License); you may not use this file except in compliance with
+ 

svn commit: r745448 - /lucene/nutch/trunk/build.xml

2009-02-18 Thread siren
Author: siren
Date: Wed Feb 18 09:18:07 2009
New Revision: 745448

URL: http://svn.apache.org/viewvc?rev=745448view=rev
Log:
NUTCH-687 add RAT, also check plugins

Modified:
lucene/nutch/trunk/build.xml

Modified: lucene/nutch/trunk/build.xml
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/build.xml?rev=745448r1=745447r2=745448view=diff
==
--- lucene/nutch/trunk/build.xml (original)
+++ lucene/nutch/trunk/build.xml Wed Feb 18 09:18:07 2009
@@ -624,7 +624,9 @@
   target name=rat-sources depends=rat-sources-typedef
  description=runs the tasks over src/java
 rat:report xmlns:rat=antlib:org.apache.rat.anttasks
-  fileset dir=src/java
+  fileset dir=src
+   include name=java/**/*/
+   include name=plugin/**/src/**/*/
   /fileset
 /rat:report
   /target




svn commit: r745499 - in /lucene/nutch/trunk: ./ src/plugin/lib-jakarta-poi/ src/plugin/lib-jakarta-poi/lib/ src/plugin/parse-msword/ src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/ sr

2009-02-18 Thread siren
Author: siren
Date: Wed Feb 18 12:43:04 2009
New Revision: 745499

URL: http://svn.apache.org/viewvc?rev=745499view=rev
Log:
NUTCH-691 - Update jakarta poi jars to the most relevant version, contributed 
by Dmitry Lihachev

Added:

lucene/nutch/trunk/src/plugin/lib-jakarta-poi/lib/poi-3.5-beta4-20081128.jar   
(with props)

lucene/nutch/trunk/src/plugin/lib-jakarta-poi/lib/poi-scratchpad-3.5-beta4-20081128.jar
   (with props)
Removed:

lucene/nutch/trunk/src/plugin/lib-jakarta-poi/lib/poi-3.0-alpha1-20050704.jar

lucene/nutch/trunk/src/plugin/lib-jakarta-poi/lib/poi-scratchpad-3.0-alpha1-20050704.jar
Modified:
lucene/nutch/trunk/CHANGES.txt
lucene/nutch/trunk/src/plugin/lib-jakarta-poi/plugin.xml
lucene/nutch/trunk/src/plugin/parse-msword/build.xml

lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/Word6Extractor.java

lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/WordExtractor.java

lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/chp/Word6CHPBinTable.java

lucene/nutch/trunk/src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java

Modified: lucene/nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=745499r1=745498r2=745499view=diff
==
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Wed Feb 18 12:43:04 2009
@@ -343,6 +343,9 @@
 128. NUTCH-631 - MoreIndexingFilter fails with NoSuchElementException
  (Stefan Will, siren)
  
+129. NUTCH-691 - Update jakarta poi jars to the most relevant version
+ (Dmitry Lihachev via siren)
+
 Release 0.9 - 2007-04-02
 
  1. Changed log4j confiquration to log to stdout on commandline

Added: 
lucene/nutch/trunk/src/plugin/lib-jakarta-poi/lib/poi-3.5-beta4-20081128.jar
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/lib-jakarta-poi/lib/poi-3.5-beta4-20081128.jar?rev=745499view=auto
==
Binary file - no diff available.

Propchange: 
lucene/nutch/trunk/src/plugin/lib-jakarta-poi/lib/poi-3.5-beta4-20081128.jar
--
svn:mime-type = application/octet-stream

Added: 
lucene/nutch/trunk/src/plugin/lib-jakarta-poi/lib/poi-scratchpad-3.5-beta4-20081128.jar
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/lib-jakarta-poi/lib/poi-scratchpad-3.5-beta4-20081128.jar?rev=745499view=auto
==
Binary file - no diff available.

Propchange: 
lucene/nutch/trunk/src/plugin/lib-jakarta-poi/lib/poi-scratchpad-3.5-beta4-20081128.jar
--
svn:mime-type = application/octet-stream

Modified: lucene/nutch/trunk/src/plugin/lib-jakarta-poi/plugin.xml
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/lib-jakarta-poi/plugin.xml?rev=745499r1=745498r2=745499view=diff
==
--- lucene/nutch/trunk/src/plugin/lib-jakarta-poi/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/lib-jakarta-poi/plugin.xml Wed Feb 18 
12:43:04 2009
@@ -29,10 +29,10 @@
provider-name=jakarta.apache.org
 
runtime
- library name=poi-3.0-alpha1-20050704.jar
+ library name=poi-3.5-beta4-20081128.jar
 export name=*/
  /library
- library name=poi-scratchpad-3.0-alpha1-20050704.jar
+ library name=poi-scratchpad-3.5-beta4-20081128.jar
 export name=*/
  /library
/runtime

Modified: lucene/nutch/trunk/src/plugin/parse-msword/build.xml
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-msword/build.xml?rev=745499r1=745498r2=745499view=diff
==
--- lucene/nutch/trunk/src/plugin/parse-msword/build.xml (original)
+++ lucene/nutch/trunk/src/plugin/parse-msword/build.xml Wed Feb 18 12:43:04 
2009
@@ -44,7 +44,8 @@
 
   !-- for junit test --
   mkdir dir=${build.test}/data/
-  copy file=sample/word95.doc todir=${build.test}/data/
-  copy file=sample/word97.doc todir=${build.test}/data/
+  copy todir=${build.test}/data
+fileset dir=sample includes=*.doc /
+  /copy
 
 /project

Modified: 
lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/Word6Extractor.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/Word6Extractor.java?rev=745499r1=745498r2=745499view=diff
==
--- 
lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/Word6Extractor.java
 

svn commit: r745517 - /lucene/nutch/trunk/contrib/web2/

2009-02-18 Thread siren
Author: siren
Date: Wed Feb 18 14:03:18 2009
New Revision: 745517

URL: http://svn.apache.org/viewvc?rev=745517view=rev
Log:
remove web2 as agreed on nutch-dev

Removed:
lucene/nutch/trunk/contrib/web2/