svn commit: r745446 - in /lucene/nutch/trunk/src: java/org/apache/nutch/util/ plugin/field-basic/src/java/org/apache/nutch/indexer/field/basic/ plugin/field-boost/src/java/org/apache/nutch/indexer/fie
Author: siren Date: Wed Feb 18 09:14:29 2009 New Revision: 745446 URL: http://svn.apache.org/viewvc?rev=745446view=rev Log: NUTCH-688 add missing headers, part 2 rest Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/GenericWritableConfigurable.java lucene/nutch/trunk/src/java/org/apache/nutch/util/NodeWalker.java lucene/nutch/trunk/src/plugin/field-basic/src/java/org/apache/nutch/indexer/field/basic/BasicFieldFilter.java lucene/nutch/trunk/src/plugin/field-boost/src/java/org/apache/nutch/indexer/field/boost/BoostFieldFilter.java lucene/nutch/trunk/src/plugin/response-json/src/java/org/apache/nutch/searcher/response/json/JSONResponseWriter.java lucene/nutch/trunk/src/plugin/response-xml/src/java/org/apache/nutch/searcher/response/xml/XMLResponseWriter.java lucene/nutch/trunk/src/plugin/scoring-link/src/java/org/apache/nutch/scoring/link/LinkAnalysisScoringFilter.java lucene/nutch/trunk/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java lucene/nutch/trunk/src/plugin/urlfilter-domain/src/test/org/apache/nutch/urlfilter/domain/TestDomainURLFilter.java Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/GenericWritableConfigurable.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/GenericWritableConfigurable.java?rev=745446r1=745445r2=745446view=diff == --- lucene/nutch/trunk/src/java/org/apache/nutch/util/GenericWritableConfigurable.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/util/GenericWritableConfigurable.java Wed Feb 18 09:14:29 2009 @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the License); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.nutch.util; import java.io.DataInput; Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/NodeWalker.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/util/NodeWalker.java?rev=745446r1=745445r2=745446view=diff == --- lucene/nutch/trunk/src/java/org/apache/nutch/util/NodeWalker.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/util/NodeWalker.java Wed Feb 18 09:14:29 2009 @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the License); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.nutch.util; import java.util.Stack; Modified: lucene/nutch/trunk/src/plugin/field-basic/src/java/org/apache/nutch/indexer/field/basic/BasicFieldFilter.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/field-basic/src/java/org/apache/nutch/indexer/field/basic/BasicFieldFilter.java?rev=745446r1=745445r2=745446view=diff == --- lucene/nutch/trunk/src/plugin/field-basic/src/java/org/apache/nutch/indexer/field/basic/BasicFieldFilter.java (original) +++ lucene/nutch/trunk/src/plugin/field-basic/src/java/org/apache/nutch/indexer/field/basic/BasicFieldFilter.java Wed Feb 18 09:14:29 2009 @@ -1,3 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the License); you may not use this file except in compliance with +
svn commit: r745448 - /lucene/nutch/trunk/build.xml
Author: siren Date: Wed Feb 18 09:18:07 2009 New Revision: 745448 URL: http://svn.apache.org/viewvc?rev=745448view=rev Log: NUTCH-687 add RAT, also check plugins Modified: lucene/nutch/trunk/build.xml Modified: lucene/nutch/trunk/build.xml URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/build.xml?rev=745448r1=745447r2=745448view=diff == --- lucene/nutch/trunk/build.xml (original) +++ lucene/nutch/trunk/build.xml Wed Feb 18 09:18:07 2009 @@ -624,7 +624,9 @@ target name=rat-sources depends=rat-sources-typedef description=runs the tasks over src/java rat:report xmlns:rat=antlib:org.apache.rat.anttasks - fileset dir=src/java + fileset dir=src + include name=java/**/*/ + include name=plugin/**/src/**/*/ /fileset /rat:report /target
svn commit: r745499 - in /lucene/nutch/trunk: ./ src/plugin/lib-jakarta-poi/ src/plugin/lib-jakarta-poi/lib/ src/plugin/parse-msword/ src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/ sr
Author: siren Date: Wed Feb 18 12:43:04 2009 New Revision: 745499 URL: http://svn.apache.org/viewvc?rev=745499view=rev Log: NUTCH-691 - Update jakarta poi jars to the most relevant version, contributed by Dmitry Lihachev Added: lucene/nutch/trunk/src/plugin/lib-jakarta-poi/lib/poi-3.5-beta4-20081128.jar (with props) lucene/nutch/trunk/src/plugin/lib-jakarta-poi/lib/poi-scratchpad-3.5-beta4-20081128.jar (with props) Removed: lucene/nutch/trunk/src/plugin/lib-jakarta-poi/lib/poi-3.0-alpha1-20050704.jar lucene/nutch/trunk/src/plugin/lib-jakarta-poi/lib/poi-scratchpad-3.0-alpha1-20050704.jar Modified: lucene/nutch/trunk/CHANGES.txt lucene/nutch/trunk/src/plugin/lib-jakarta-poi/plugin.xml lucene/nutch/trunk/src/plugin/parse-msword/build.xml lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/Word6Extractor.java lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/WordExtractor.java lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/chp/Word6CHPBinTable.java lucene/nutch/trunk/src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java Modified: lucene/nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=745499r1=745498r2=745499view=diff == --- lucene/nutch/trunk/CHANGES.txt (original) +++ lucene/nutch/trunk/CHANGES.txt Wed Feb 18 12:43:04 2009 @@ -343,6 +343,9 @@ 128. NUTCH-631 - MoreIndexingFilter fails with NoSuchElementException (Stefan Will, siren) +129. NUTCH-691 - Update jakarta poi jars to the most relevant version + (Dmitry Lihachev via siren) + Release 0.9 - 2007-04-02 1. Changed log4j confiquration to log to stdout on commandline Added: lucene/nutch/trunk/src/plugin/lib-jakarta-poi/lib/poi-3.5-beta4-20081128.jar URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/lib-jakarta-poi/lib/poi-3.5-beta4-20081128.jar?rev=745499view=auto == Binary file - no diff available. Propchange: lucene/nutch/trunk/src/plugin/lib-jakarta-poi/lib/poi-3.5-beta4-20081128.jar -- svn:mime-type = application/octet-stream Added: lucene/nutch/trunk/src/plugin/lib-jakarta-poi/lib/poi-scratchpad-3.5-beta4-20081128.jar URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/lib-jakarta-poi/lib/poi-scratchpad-3.5-beta4-20081128.jar?rev=745499view=auto == Binary file - no diff available. Propchange: lucene/nutch/trunk/src/plugin/lib-jakarta-poi/lib/poi-scratchpad-3.5-beta4-20081128.jar -- svn:mime-type = application/octet-stream Modified: lucene/nutch/trunk/src/plugin/lib-jakarta-poi/plugin.xml URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/lib-jakarta-poi/plugin.xml?rev=745499r1=745498r2=745499view=diff == --- lucene/nutch/trunk/src/plugin/lib-jakarta-poi/plugin.xml (original) +++ lucene/nutch/trunk/src/plugin/lib-jakarta-poi/plugin.xml Wed Feb 18 12:43:04 2009 @@ -29,10 +29,10 @@ provider-name=jakarta.apache.org runtime - library name=poi-3.0-alpha1-20050704.jar + library name=poi-3.5-beta4-20081128.jar export name=*/ /library - library name=poi-scratchpad-3.0-alpha1-20050704.jar + library name=poi-scratchpad-3.5-beta4-20081128.jar export name=*/ /library /runtime Modified: lucene/nutch/trunk/src/plugin/parse-msword/build.xml URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-msword/build.xml?rev=745499r1=745498r2=745499view=diff == --- lucene/nutch/trunk/src/plugin/parse-msword/build.xml (original) +++ lucene/nutch/trunk/src/plugin/parse-msword/build.xml Wed Feb 18 12:43:04 2009 @@ -44,7 +44,8 @@ !-- for junit test -- mkdir dir=${build.test}/data/ - copy file=sample/word95.doc todir=${build.test}/data/ - copy file=sample/word97.doc todir=${build.test}/data/ + copy todir=${build.test}/data +fileset dir=sample includes=*.doc / + /copy /project Modified: lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/Word6Extractor.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/Word6Extractor.java?rev=745499r1=745498r2=745499view=diff == --- lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/Word6Extractor.java
svn commit: r745517 - /lucene/nutch/trunk/contrib/web2/
Author: siren Date: Wed Feb 18 14:03:18 2009 New Revision: 745517 URL: http://svn.apache.org/viewvc?rev=745517view=rev Log: remove web2 as agreed on nutch-dev Removed: lucene/nutch/trunk/contrib/web2/