This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push: new 792ed2891 NUTCH-3015 Add more CI steps to GitHub master-build.yml (#790) 792ed2891 is described below commit 792ed28914f4beb2fb8b8ce28eebe17196c92af1 Author: Lewis John McGibbney <lewis.mcgibb...@gmail.com> AuthorDate: Fri Oct 27 15:04:22 2023 -0700 NUTCH-3015 Add more CI steps to GitHub master-build.yml (#790) --- .../{master-build.yml => dependency-check.yml} | 25 ++++----- .github/workflows/master-build.yml | 64 +++++++++++++++++----- .gitignore | 1 + build.xml | 52 +++++++++++++++--- .../dependency-check-suppressions.xml | 5 -- src/java/overview.html | 16 ++++++ .../creativecommons/conf/crawl-urlfilter.txt | 15 +++++ src/plugin/creativecommons/conf/nutch-site.xml | 16 ++++++ src/plugin/creativecommons/data/anchor.html | 16 ++++++ src/plugin/creativecommons/data/rdf.html | 16 ++++++ src/plugin/creativecommons/data/rel.html | 16 ++++++ src/plugin/creativecommons/ivy.xml | 1 - src/plugin/exchange-jexl/README.md | 17 ++++++ src/plugin/exchange-jexl/ivy.xml | 1 - src/plugin/feed/ivy.xml | 1 - src/plugin/headings/ivy.xml | 1 - src/plugin/index-anchor/ivy.xml | 1 - src/plugin/index-basic/ivy.xml | 1 - src/plugin/index-geoip/ivy.xml | 1 - src/plugin/index-geoip/plugin.xml | 1 + src/plugin/index-jexl-filter/ivy.xml | 1 - src/plugin/index-links/README.md | 17 ++++++ src/plugin/index-links/ivy.xml | 1 - src/plugin/index-metadata/ivy.xml | 1 - src/plugin/index-more/ivy.xml | 1 - src/plugin/index-replace/ivy.xml | 1 - .../index-replace/sample/testIndexReplace.html | 16 ++++++ src/plugin/index-static/ivy.xml | 1 - src/plugin/indexer-cloudsearch/README.md | 17 ++++++ src/plugin/indexer-cloudsearch/createCSDomain.sh | 15 +++++ src/plugin/indexer-csv/README.md | 17 ++++++ src/plugin/indexer-csv/ivy.xml | 1 - src/plugin/indexer-dummy/README.md | 17 ++++++ src/plugin/indexer-dummy/ivy.xml | 1 - src/plugin/indexer-elastic/README.md | 17 ++++++ .../{howto_upgrade_es.txt => howto_upgrade_es.md} | 17 ++++++ src/plugin/indexer-kafka/ivy.xml | 1 - src/plugin/indexer-opensearch-1x/README.md | 17 ++++++ ..._opensearch.txt => howto_upgrade_opensearch.md} | 17 ++++++ src/plugin/indexer-rabbit/README.md | 17 ++++++ src/plugin/indexer-rabbit/ivy.xml | 1 - src/plugin/indexer-solr/README.md | 17 ++++++ ...owto_upgrade_solr.txt => howto_upgrade_solr.md} | 17 ++++++ src/plugin/indexer-solr/ivy.xml | 25 +++++---- src/plugin/indexer-solr/plugin.xml | 26 +++++---- src/plugin/language-identifier/ivy.xml | 1 - src/plugin/lib-htmlunit/ivy.xml | 1 - src/plugin/lib-http/ivy.xml | 1 - src/plugin/lib-nekohtml/ivy.xml | 1 - src/plugin/lib-rabbitmq/ivy.xml | 1 - src/plugin/lib-regex-filter/ivy.xml | 1 - src/plugin/lib-selenium/README.md | 17 ++++++ .../howto_upgrade_selenium.md} | 42 +++++--------- src/plugin/lib-selenium/howto_upgrade_selenium.txt | 15 ----- src/plugin/lib-selenium/ivy.xml | 1 - src/plugin/lib-xml/ivy.xml | 1 - src/plugin/microformats-reltag/ivy.xml | 1 - src/plugin/mimetype-filter/ivy.xml | 1 - src/plugin/nutch-extensionpoints/ivy.xml | 1 - src/plugin/parse-ext/command | 15 +++++ src/plugin/parse-ext/ivy.xml | 1 - src/plugin/parse-html/ivy.xml | 1 - src/plugin/parse-js/ivy.xml | 1 - .../parse-js/sample/parse_embedded_js_test.html | 16 ++++++ src/plugin/parse-js/sample/parse_pure_js_test.js | 15 +++++ src/plugin/parse-metatags/ivy.xml | 1 - src/plugin/parse-metatags/sample/testMetatags.html | 16 ++++++ .../sample/testMultivalueMetatags.html | 16 ++++++ ...owto_upgrade_tika.txt => howto_upgrade_tika.md} | 17 ++++++ src/plugin/parse-tika/ivy.xml | 1 - src/plugin/parse-tika/sample/nutch.html | 16 ++++++ src/plugin/parse-zip/ivy.xml | 1 - src/plugin/parsefilter-debug/ivy.xml | 1 - src/plugin/parsefilter-naivebayes/ivy.xml | 1 - .../parsefilter-regex/data/regex-parsefilter.txt | 15 +++++ src/plugin/parsefilter-regex/ivy.xml | 1 - src/plugin/protocol-file/ivy.xml | 1 - .../protocol-file/sample/testprotocolfile.txt | 15 +++++ .../sample/testprotocolfile_(encoded).txt | 15 +++++ src/plugin/protocol-foo/ivy.xml | 1 - src/plugin/protocol-foo/plugin.xml | 1 - src/plugin/protocol-ftp/ivy.xml | 1 - src/plugin/protocol-htmlunit/ivy.xml | 1 - src/plugin/protocol-http/ivy.xml | 1 - src/plugin/protocol-httpclient/ivy.xml | 1 - src/plugin/protocol-interactiveselenium/README.md | 17 ++++++ src/plugin/protocol-interactiveselenium/ivy.xml | 1 - ..._upgrade_okhttp.txt => howto_upgrade_okhttp.md} | 17 ++++++ src/plugin/protocol-okhttp/ivy.xml | 1 - src/plugin/protocol-selenium/README.md | 17 ++++++ src/plugin/protocol-selenium/ivy.xml | 1 - src/plugin/publish-rabbitmq/ivy.xml | 1 - src/plugin/scoring-depth/ivy.xml | 1 - src/plugin/scoring-link/ivy.xml | 1 - src/plugin/scoring-metadata/ivy.xml | 1 - src/plugin/scoring-opic/ivy.xml | 1 - src/plugin/scoring-orphan/ivy.xml | 1 - src/plugin/scoring-similarity/ivy.xml | 1 - src/plugin/subcollection/ivy.xml | 1 - src/plugin/tld/ivy.xml | 1 - src/plugin/urlfilter-automaton/ivy.xml | 1 - src/plugin/urlfilter-domain/data/hosts.txt | 15 +++++ src/plugin/urlfilter-domain/ivy.xml | 1 - src/plugin/urlfilter-domaindenylist/data/hosts.txt | 15 +++++ src/plugin/urlfilter-domaindenylist/ivy.xml | 1 - src/plugin/urlfilter-fast/README.md | 16 ++++++ src/plugin/urlfilter-fast/ivy.xml | 1 - src/plugin/urlfilter-ignoreexempt/README.md | 17 ++++++ src/plugin/urlfilter-ignoreexempt/ivy.xml | 1 - src/plugin/urlfilter-prefix/ivy.xml | 1 - src/plugin/urlfilter-regex/ivy.xml | 1 - src/plugin/urlfilter-suffix/ivy.xml | 1 - src/plugin/urlfilter-validator/ivy.xml | 1 - src/plugin/urlmeta/ivy.xml | 1 - src/plugin/urlnormalizer-ajax/ivy.xml | 1 - src/plugin/urlnormalizer-basic/ivy.xml | 1 - src/plugin/urlnormalizer-host/data/hosts.txt | 15 +++++ src/plugin/urlnormalizer-host/ivy.xml | 1 - src/plugin/urlnormalizer-pass/ivy.xml | 1 - .../urlnormalizer-protocol/data/protocols.txt | 15 +++++ src/plugin/urlnormalizer-protocol/ivy.xml | 1 - src/plugin/urlnormalizer-querystring/ivy.xml | 1 - src/plugin/urlnormalizer-regex/ivy.xml | 1 - .../sample/regex-normalize-default.test | 15 +++++ .../sample/regex-normalize-scope1.test | 15 +++++ src/plugin/urlnormalizer-slash/data/slashes.txt | 15 +++++ src/plugin/urlnormalizer-slash/ivy.xml | 1 - src/test/crawl-tests.xml | 16 ++++++ src/test/filter-all.txt | 15 +++++ src/test/log4j.properties | 15 +++++ src/test/nutch-site.xml | 16 ++++++ .../fetch-test-site/dup_of_pagea.html | 16 ++++++ src/testresources/fetch-test-site/exception.html | 16 ++++++ src/testresources/fetch-test-site/index.html | 16 ++++++ .../fetch-test-site/nested_spider_trap.html | 16 ++++++ src/testresources/fetch-test-site/pagea.html | 16 ++++++ src/testresources/fetch-test-site/pageb.html | 16 ++++++ src/testresources/fetch-test-site/robots.txt | 14 +++++ 138 files changed, 1017 insertions(+), 177 deletions(-) diff --git a/.github/workflows/master-build.yml b/.github/workflows/dependency-check.yml similarity index 72% copy from .github/workflows/master-build.yml copy to .github/workflows/dependency-check.yml index ba1d470ec..f07f746a0 100644 --- a/.github/workflows/master-build.yml +++ b/.github/workflows/dependency-check.yml @@ -1,4 +1,3 @@ -# # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. @@ -13,28 +12,26 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# name: master pr build on: - push: - branches: [ master ] - pull_request: - branches: [ master ] + schedule: + - cron: '0 0 * * *' # every day at midnight jobs: - build: - runs-on: ubuntu-latest + dependency-check: strategy: matrix: - java: [ '11' ] - + java: ['11'] + os: [ubuntu-latest] + runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up JDK ${{ matrix.java }} - uses: actions/setup-java@v1 + uses: actions/setup-java@v3 with: java-version: ${{ matrix.java }} - - name: Build with Ant - run: ant clean nightly javadoc -buildfile build.xml + distribution: 'temurin' + - name: Dependency check + run: ant clean dependency-check -buildfile build.xml diff --git a/.github/workflows/master-build.yml b/.github/workflows/master-build.yml index ba1d470ec..e0af58df0 100644 --- a/.github/workflows/master-build.yml +++ b/.github/workflows/master-build.yml @@ -1,4 +1,3 @@ -# # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. @@ -13,28 +12,67 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# -name: master pr build +name: master pull request ci on: push: - branches: [ master ] + branches: [master] pull_request: - branches: [ master ] + types: [opened, synchronize, reopened] + branches: [master] jobs: - build: - runs-on: ubuntu-latest + javadoc: strategy: matrix: - java: [ '11' ] - + java: ['11'] + os: [ubuntu-latest] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v3 + with: + java-version: ${{ matrix.java }} + distribution: 'temurin' + - name: Javadoc + run: ant clean javadoc -buildfile build.xml + rat: + strategy: + matrix: + java: ['11'] + os: [ubuntu-latest] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - name: Set up JDK ${{ matrix.java }} + uses: actions/setup-java@v3 + with: + java-version: ${{ matrix.java }} + distribution: 'temurin' + - name: Run Apache Rat + run: ant clean run-rat -buildfile build.xml + - name: Cache unknown licenses + run: echo "UNKNOWN_LICENSES=$(sed -n 18p /home/runner/work/nutch/nutch/build/apache-rat-report.txt)" >> $GITHUB_ENV + - name: Versions + run: | + echo $UNKNOWN_LICENSES + - name: Fail if any unknown licenses + if: ${{ env.UNKNOWN_LICENSES != '0 Unknown Licenses' }} + run: exit 1 + test: + strategy: + matrix: + java: ['11'] + os: [ubuntu-latest, macos-latest] + runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up JDK ${{ matrix.java }} - uses: actions/setup-java@v1 + uses: actions/setup-java@v3 with: java-version: ${{ matrix.java }} - - name: Build with Ant - run: ant clean nightly javadoc -buildfile build.xml + distribution: 'temurin' + - name: Test + run: ant clean test -buildfile build.xml diff --git a/.gitignore b/.gitignore index b46690852..12365dd0d 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,4 @@ csvindexwriter lib/spotbugs-* ivy/dependency-check-ant/* .gradle* +ivy/apache-rat-* diff --git a/build.xml b/build.xml index b44581405..dd9797302 100644 --- a/build.xml +++ b/build.xml @@ -38,7 +38,7 @@ <property name="maven-javadoc-jar" value="${release.dir}/${artifactId}-${version}-javadoc.jar" /> <property name="maven-sources-jar" value="${release.dir}/${artifactId}-${version}-sources.jar" /> - <property name="dependency-check-ant.version" value="7.1.1" /> + <property name="dependency-check-ant.version" value="8.4.2" /> <property name="dependency-check-ant.home" value="${ivy.dir}/dependency-check-ant" /> <property name="dependency-check-ant.jar" value="${dependency-check-ant.home}/dependency-check-ant.jar" /> @@ -48,7 +48,7 @@ <property name="spotbugs.home" value="${ivy.dir}/spotbugs-${spotbugs.version}" /> <property name="spotbugs.jar" value="${spotbugs.home}/lib/spotbugs-ant.jar" /> - <property name="apache-rat.version" value="0.14" /> + <property name="apache-rat.version" value="0.15" /> <property name="apache-rat.home" value="${ivy.dir}/apache-rat-${apache-rat.version}" /> <property name="apache-rat.jar" value="${apache-rat.home}/apache-rat-${apache-rat.version}.jar" /> @@ -640,13 +640,15 @@ </fileset> </path> - <target name="report-vulnerabilities" depends="jar, compile-plugins, dependency-check-ant-download" description="--> check dependencies for security vulnerabilities"> + <target name="dependency-check" depends="jar, compile-plugins, dependency-check-ant-download" description="--> check dependencies for security vulnerabilities"> <taskdef resource="dependency-check-taskdefs.properties"> <classpath refid="dependency-check-ant.path" /> </taskdef> <dependency-check projectname="${name}" reportoutputdirectory="${dependency-check-ant.home}" - reportformat="ALL"> + reportformat="ALL" + assemblyAnalyzerEnabled="false" + failBuildOnCVSS="1"> <suppressionfile path="${dependency-check-ant.home}/dependency-check-suppressions.xml" /> <retirejsFilter regex="copyright.*jeremy long" /> <fileset dir="${build.dir}"> @@ -1025,7 +1027,7 @@ <target name="apache-rat-download-unchecked" unless="apache-rat.jar.found" description="--> downloads the Apache Rat jar"> - <get src="https://www.apache.org/dist/creadur/apache-rat-${apache-rat.version}/apache-rat-${apache-rat.version}-bin.tar.gz" + <get src="https://archive.apache.org/dist/creadur/apache-rat-${apache-rat.version}/apache-rat-${apache-rat.version}-bin.tar.gz" dest="${ivy.dir}/apache-rat-${apache-rat.version}-bin.tar.gz" usetimestamp="false" /> <untar src="${ivy.dir}/apache-rat-${apache-rat.version}-bin.tar.gz" @@ -1035,8 +1037,8 @@ <delete file="${ivy.dir}/apache-rat-${apache-rat.version}-bin.tar.gz" /> </target> - <target name="rat-sources" depends="init, apache-rat-download" - description="--> runs RAT tasks over src/java"> + <target name="run-rat" depends="init, apache-rat-download" + description="--> runs Apache Rat on codebase"> <taskdef uri="antlib:org.apache.rat.anttasks" resource="org/apache/rat/anttasks/antlib.xml"> @@ -1047,8 +1049,40 @@ <rat:report reportFile="${build.dir}/apache-rat-report.txt"> <fileset dir="src"> - <include name="java/**/*"/> - <include name="plugin/**/src/**/*"/> + <include name="**"/> + <exclude name="plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/langmappings.properties"/> + <exclude name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/de.test"/> + <exclude name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/en.test"/> + <exclude name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/es.test"/> + <exclude name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/fi.test"/> + <exclude name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/fr.test"/> + <exclude name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/it.test"/> + <exclude name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/nl.test"/> + <exclude name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/pt.test"/> + <exclude name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/sv.test"/> + <exclude name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/test-referencial.txt"/> + <exclude name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/da.test"/> + <exclude name="plugin/parse-tika/sample/ootest.txt"/> + <exclude name="plugin/parse-tika/sample/test.rtf"/> + <exclude name="plugin/urlfilter-ignoreexempt/data/.donotdelete"/> + <exclude name="plugin/urlfilter-automaton/sample/Benchmarks.rules"/> + <exclude name="plugin/urlfilter-automaton/sample/Benchmarks.urls"/> + <exclude name="plugin/urlfilter-automaton/sample/IntranetCrawling.rules"/> + <exclude name="plugin/urlfilter-automaton/sample/IntranetCrawling.urls"/> + <exclude name="plugin/urlfilter-automaton/sample/WholeWebCrawling.rules"/> + <exclude name="plugin/urlfilter-automaton/sample/WholeWebCrawling.urls"/> + <exclude name="plugin/urlfilter-fast/sample/Benchmarks.urls"/> + <exclude name="plugin/urlfilter-fast/sample/fast-urlfilter-benchmark.txt"/> + <exclude name="plugin/urlfilter-fast/sample/fast-urlfilter-test.txt"/> + <exclude name="plugin/urlfilter-fast/sample/test.urls"/> + <exclude name="plugin/urlfilter-regex/sample/Benchmarks.rules"/> + <exclude name="plugin/urlfilter-regex/sample/Benchmarks.urls"/> + <exclude name="plugin/urlfilter-regex/sample/IntranetCrawling.rules"/> + <exclude name="plugin/urlfilter-regex/sample/IntranetCrawling.urls"/> + <exclude name="plugin/urlfilter-regex/sample/WholeWebCrawling.rules"/> + <exclude name="plugin/urlfilter-regex/sample/WholeWebCrawling.urls"/> + <exclude name="plugin/urlfilter-regex/sample/nutch1838.rules"/> + <exclude name="plugin/urlfilter-regex/sample/nutch1838.urls"/> </fileset> </rat:report> </target> diff --git a/ivy/dependency-check-ant/dependency-check-suppressions.xml b/ivy/dependency-check-ant/dependency-check-suppressions.xml index e7de8febb..a7f4ca16d 100644 --- a/ivy/dependency-check-ant/dependency-check-suppressions.xml +++ b/ivy/dependency-check-ant/dependency-check-suppressions.xml @@ -1,8 +1,3 @@ <?xml version="1.0" encoding="UTF-8"?> <suppressions xmlns="https://jeremylong.github.io/DependencyCheck/dependency-suppression.1.1.xsd"> - <suppress> - <notes>only applies to tika-server < 1.18</notes> - <gav regex="true">^org\.(apache\.tika:tika-(core|parsers)|gagravarr:vorbis-java-tika):.*$</gav> - <cve>CVE-2018-1335</cve> - </suppress> </suppressions> diff --git a/src/java/overview.html b/src/java/overview.html index 11321417b..3de53a7d2 100644 --- a/src/java/overview.html +++ b/src/java/overview.html @@ -1,3 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> <html> <head> <title>Apache Nutch</title> diff --git a/src/plugin/creativecommons/conf/crawl-urlfilter.txt b/src/plugin/creativecommons/conf/crawl-urlfilter.txt index 324617f07..eb6786e4b 100644 --- a/src/plugin/creativecommons/conf/crawl-urlfilter.txt +++ b/src/plugin/creativecommons/conf/crawl-urlfilter.txt @@ -1,3 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Creative Commnons crawl filter # Each non-comment, non-blank line contains a regular expression diff --git a/src/plugin/creativecommons/conf/nutch-site.xml b/src/plugin/creativecommons/conf/nutch-site.xml index e28e12a9a..4b343b2cc 100644 --- a/src/plugin/creativecommons/conf/nutch-site.xml +++ b/src/plugin/creativecommons/conf/nutch-site.xml @@ -1,5 +1,21 @@ <?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> <!-- Creative Commons' Nutch configuration --> diff --git a/src/plugin/creativecommons/data/anchor.html b/src/plugin/creativecommons/data/anchor.html index 90b522759..3267bc9ea 100755 --- a/src/plugin/creativecommons/data/anchor.html +++ b/src/plugin/creativecommons/data/anchor.html @@ -1,3 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd"> <html> <head> diff --git a/src/plugin/creativecommons/data/rdf.html b/src/plugin/creativecommons/data/rdf.html index fb2c34dfe..60c27cc54 100755 --- a/src/plugin/creativecommons/data/rdf.html +++ b/src/plugin/creativecommons/data/rdf.html @@ -1,3 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> <head> diff --git a/src/plugin/creativecommons/data/rel.html b/src/plugin/creativecommons/data/rel.html index 413d52f86..3d11572d8 100755 --- a/src/plugin/creativecommons/data/rel.html +++ b/src/plugin/creativecommons/data/rel.html @@ -1,3 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" lang="en"><head> </head><body> diff --git a/src/plugin/creativecommons/ivy.xml b/src/plugin/creativecommons/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/creativecommons/ivy.xml +++ b/src/plugin/creativecommons/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/exchange-jexl/README.md b/src/plugin/exchange-jexl/README.md index 2d2024276..35a711b90 100644 --- a/src/plugin/exchange-jexl/README.md +++ b/src/plugin/exchange-jexl/README.md @@ -1,3 +1,20 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + exchange-jexl plugin for Nutch ============================== diff --git a/src/plugin/exchange-jexl/ivy.xml b/src/plugin/exchange-jexl/ivy.xml index 1275664e5..cb5a0f186 100644 --- a/src/plugin/exchange-jexl/ivy.xml +++ b/src/plugin/exchange-jexl/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/feed/ivy.xml b/src/plugin/feed/ivy.xml index 7e3f4ede3..a7671307b 100644 --- a/src/plugin/feed/ivy.xml +++ b/src/plugin/feed/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/headings/ivy.xml b/src/plugin/headings/ivy.xml index a8d6b9d48..63007f93c 100644 --- a/src/plugin/headings/ivy.xml +++ b/src/plugin/headings/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/index-anchor/ivy.xml b/src/plugin/index-anchor/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/index-anchor/ivy.xml +++ b/src/plugin/index-anchor/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/index-basic/ivy.xml b/src/plugin/index-basic/ivy.xml index 673ea7f09..7bae19bb9 100644 --- a/src/plugin/index-basic/ivy.xml +++ b/src/plugin/index-basic/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/index-geoip/ivy.xml b/src/plugin/index-geoip/ivy.xml index 2eda5a63f..45a638819 100644 --- a/src/plugin/index-geoip/ivy.xml +++ b/src/plugin/index-geoip/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/index-geoip/plugin.xml b/src/plugin/index-geoip/plugin.xml index c4efadf94..dda1b6a7b 100644 --- a/src/plugin/index-geoip/plugin.xml +++ b/src/plugin/index-geoip/plugin.xml @@ -1,3 +1,4 @@ +<?xml version="1.0" encoding="UTF-8"?> <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/index-jexl-filter/ivy.xml b/src/plugin/index-jexl-filter/ivy.xml index 624dcaf4a..3d4fc905c 100644 --- a/src/plugin/index-jexl-filter/ivy.xml +++ b/src/plugin/index-jexl-filter/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/index-links/README.md b/src/plugin/index-links/README.md index f25d1cf6d..ac0f071f4 100644 --- a/src/plugin/index-links/README.md +++ b/src/plugin/index-links/README.md @@ -1,3 +1,20 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + indexer-links plugin for Nutch ============================== diff --git a/src/plugin/index-links/ivy.xml b/src/plugin/index-links/ivy.xml index 624dcaf4a..3d4fc905c 100644 --- a/src/plugin/index-links/ivy.xml +++ b/src/plugin/index-links/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/index-metadata/ivy.xml b/src/plugin/index-metadata/ivy.xml index 1275664e5..cb5a0f186 100644 --- a/src/plugin/index-metadata/ivy.xml +++ b/src/plugin/index-metadata/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/index-more/ivy.xml b/src/plugin/index-more/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/index-more/ivy.xml +++ b/src/plugin/index-more/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/index-replace/ivy.xml b/src/plugin/index-replace/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/index-replace/ivy.xml +++ b/src/plugin/index-replace/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/index-replace/sample/testIndexReplace.html b/src/plugin/index-replace/sample/testIndexReplace.html index 0b90fc211..fb2ef03a5 100644 --- a/src/plugin/index-replace/sample/testIndexReplace.html +++ b/src/plugin/index-replace/sample/testIndexReplace.html @@ -1,3 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> <html> <head> <title>Testing the power of the index-replace plugin</title> diff --git a/src/plugin/index-static/ivy.xml b/src/plugin/index-static/ivy.xml index 1275664e5..cb5a0f186 100644 --- a/src/plugin/index-static/ivy.xml +++ b/src/plugin/index-static/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/indexer-cloudsearch/README.md b/src/plugin/indexer-cloudsearch/README.md index 10b5daa90..a0609c0fb 100644 --- a/src/plugin/indexer-cloudsearch/README.md +++ b/src/plugin/indexer-cloudsearch/README.md @@ -1,3 +1,20 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + AWS CloudSearch plugin for Nutch ================================ diff --git a/src/plugin/indexer-cloudsearch/createCSDomain.sh b/src/plugin/indexer-cloudsearch/createCSDomain.sh index 24fb0156c..1cb8481fe 100644 --- a/src/plugin/indexer-cloudsearch/createCSDomain.sh +++ b/src/plugin/indexer-cloudsearch/createCSDomain.sh @@ -1,3 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # example of domain configuration for CloudSearch DOMAIN="$1" diff --git a/src/plugin/indexer-csv/README.md b/src/plugin/indexer-csv/README.md index 80220974a..4d1288b19 100644 --- a/src/plugin/indexer-csv/README.md +++ b/src/plugin/indexer-csv/README.md @@ -1,3 +1,20 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + indexer-csv plugin for Nutch ============================ diff --git a/src/plugin/indexer-csv/ivy.xml b/src/plugin/indexer-csv/ivy.xml index 75b5d54e5..e7bf87546 100644 --- a/src/plugin/indexer-csv/ivy.xml +++ b/src/plugin/indexer-csv/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/indexer-dummy/README.md b/src/plugin/indexer-dummy/README.md index 2a4b2bd15..a7fa53009 100644 --- a/src/plugin/indexer-dummy/README.md +++ b/src/plugin/indexer-dummy/README.md @@ -1,3 +1,20 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + indexer-dummy plugin for Nutch ============================== diff --git a/src/plugin/indexer-dummy/ivy.xml b/src/plugin/indexer-dummy/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/indexer-dummy/ivy.xml +++ b/src/plugin/indexer-dummy/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/indexer-elastic/README.md b/src/plugin/indexer-elastic/README.md index 466762e1c..3dfd888ff 100644 --- a/src/plugin/indexer-elastic/README.md +++ b/src/plugin/indexer-elastic/README.md @@ -1,3 +1,20 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + indexer-elastic plugin for Nutch ================================ diff --git a/src/plugin/indexer-elastic/howto_upgrade_es.txt b/src/plugin/indexer-elastic/howto_upgrade_es.md similarity index 60% rename from src/plugin/indexer-elastic/howto_upgrade_es.txt rename to src/plugin/indexer-elastic/howto_upgrade_es.md index a8156444c..b57e0c02f 100644 --- a/src/plugin/indexer-elastic/howto_upgrade_es.txt +++ b/src/plugin/indexer-elastic/howto_upgrade_es.md @@ -1,3 +1,20 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + 1. Upgrade Elasticsearch dependency in src/plugin/indexer-elastic/ivy.xml 2. Upgrade the Elasticsearch specific dependencies in src/plugin/indexer-elastic/plugin.xml diff --git a/src/plugin/indexer-kafka/ivy.xml b/src/plugin/indexer-kafka/ivy.xml index 7bdd94324..9d605c50b 100644 --- a/src/plugin/indexer-kafka/ivy.xml +++ b/src/plugin/indexer-kafka/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/indexer-opensearch-1x/README.md b/src/plugin/indexer-opensearch-1x/README.md index 52e5844af..e5e76f0b6 100644 --- a/src/plugin/indexer-opensearch-1x/README.md +++ b/src/plugin/indexer-opensearch-1x/README.md @@ -1,3 +1,20 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + indexer-opensearch1x plugin for Nutch ================================ diff --git a/src/plugin/indexer-opensearch-1x/howto_upgrade_opensearch.txt b/src/plugin/indexer-opensearch-1x/howto_upgrade_opensearch.md similarity index 60% rename from src/plugin/indexer-opensearch-1x/howto_upgrade_opensearch.txt rename to src/plugin/indexer-opensearch-1x/howto_upgrade_opensearch.md index 072590044..c9b723ffc 100644 --- a/src/plugin/indexer-opensearch-1x/howto_upgrade_opensearch.txt +++ b/src/plugin/indexer-opensearch-1x/howto_upgrade_opensearch.md @@ -1,3 +1,20 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + 1. Upgrade OpenSearch dependency in src/plugin/indexer-opensearch-1x/ivy.xml 2. Upgrade the OpenSearch specific dependencies in src/plugin/indexer-opensearch-1x/plugin.xml diff --git a/src/plugin/indexer-rabbit/README.md b/src/plugin/indexer-rabbit/README.md index 6ea09a915..8040cd6c7 100644 --- a/src/plugin/indexer-rabbit/README.md +++ b/src/plugin/indexer-rabbit/README.md @@ -1,3 +1,20 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + indexer-rabbit plugin for Nutch =============================== diff --git a/src/plugin/indexer-rabbit/ivy.xml b/src/plugin/indexer-rabbit/ivy.xml index dd450cf7f..d2daf91da 100644 --- a/src/plugin/indexer-rabbit/ivy.xml +++ b/src/plugin/indexer-rabbit/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/indexer-solr/README.md b/src/plugin/indexer-solr/README.md index c3a4601e1..3a27e4116 100644 --- a/src/plugin/indexer-solr/README.md +++ b/src/plugin/indexer-solr/README.md @@ -1,3 +1,20 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + indexer-solr plugin for Nutch ============================= diff --git a/src/plugin/indexer-solr/howto_upgrade_solr.txt b/src/plugin/indexer-solr/howto_upgrade_solr.md similarity index 58% rename from src/plugin/indexer-solr/howto_upgrade_solr.txt rename to src/plugin/indexer-solr/howto_upgrade_solr.md index b2a7eb5c8..905fb84a9 100644 --- a/src/plugin/indexer-solr/howto_upgrade_solr.txt +++ b/src/plugin/indexer-solr/howto_upgrade_solr.md @@ -1,3 +1,20 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + 1. Upgrade Solr dependency in src/plugin/indexer-solr/ivy.xml 2. Upgrade the Solr specific dependencies in src/plugin/indexer-solr/plugin.xml diff --git a/src/plugin/indexer-solr/ivy.xml b/src/plugin/indexer-solr/ivy.xml index ce59942da..ab5fd72c7 100644 --- a/src/plugin/indexer-solr/ivy.xml +++ b/src/plugin/indexer-solr/ivy.xml @@ -1,15 +1,20 @@ <?xml version="1.0" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at -<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor - license agreements. See the NOTICE file distributed with this work for additional - information regarding copyright ownership. The ASF licenses this file to - You under the Apache License, Version 2.0 (the "License"); you may not use - this file except in compliance with the License. You may obtain a copy of - the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required - by applicable law or agreed to in writing, software distributed under the - License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS - OF ANY KIND, either express or implied. See the License for the specific - language governing permissions and limitations under the License. --> + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> <ivy-module version="1.0"> <info organisation="org.apache.nutch" module="${ant.project.name}"> diff --git a/src/plugin/indexer-solr/plugin.xml b/src/plugin/indexer-solr/plugin.xml index f672ac9ed..21cc7d8bd 100644 --- a/src/plugin/indexer-solr/plugin.xml +++ b/src/plugin/indexer-solr/plugin.xml @@ -1,14 +1,20 @@ <?xml version="1.0" encoding="UTF-8"?> -<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor - license agreements. See the NOTICE file distributed with this work for additional - information regarding copyright ownership. The ASF licenses this file to - You under the Apache License, Version 2.0 (the "License"); you may not use - this file except in compliance with the License. You may obtain a copy of - the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required - by applicable law or agreed to in writing, software distributed under the - License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS - OF ANY KIND, either express or implied. See the License for the specific - language governing permissions and limitations under the License. --> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> <plugin id="indexer-solr" name="SolrIndexWriter" version="1.0.0" provider-name="nutch.apache.org"> diff --git a/src/plugin/language-identifier/ivy.xml b/src/plugin/language-identifier/ivy.xml index 68e9ed76e..f64b97055 100644 --- a/src/plugin/language-identifier/ivy.xml +++ b/src/plugin/language-identifier/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/lib-htmlunit/ivy.xml b/src/plugin/lib-htmlunit/ivy.xml index b03211667..795e6b335 100644 --- a/src/plugin/lib-htmlunit/ivy.xml +++ b/src/plugin/lib-htmlunit/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/lib-http/ivy.xml b/src/plugin/lib-http/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/lib-http/ivy.xml +++ b/src/plugin/lib-http/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/lib-nekohtml/ivy.xml b/src/plugin/lib-nekohtml/ivy.xml index 072fb05b9..32fcd8c4b 100644 --- a/src/plugin/lib-nekohtml/ivy.xml +++ b/src/plugin/lib-nekohtml/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/lib-rabbitmq/ivy.xml b/src/plugin/lib-rabbitmq/ivy.xml index 1b6ceac37..8184530af 100644 --- a/src/plugin/lib-rabbitmq/ivy.xml +++ b/src/plugin/lib-rabbitmq/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/lib-regex-filter/ivy.xml b/src/plugin/lib-regex-filter/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/lib-regex-filter/ivy.xml +++ b/src/plugin/lib-regex-filter/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/lib-selenium/README.md b/src/plugin/lib-selenium/README.md index 1c6b37c5f..5054d7ad8 100644 --- a/src/plugin/lib-selenium/README.md +++ b/src/plugin/lib-selenium/README.md @@ -1,3 +1,20 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + # Updates * The use of phantomjs has been deprecated. Check [Wikipedia](https://en.wikipedia.org/wiki/PhantomJS) for more info. * The updated code for Safari webriver is under development as starting Safari 10 on OS X El Capitan and macOS Sierra, Safari comes bundled with a new driver implementation. diff --git a/src/plugin/index-geoip/plugin.xml b/src/plugin/lib-selenium/howto_upgrade_selenium.md similarity index 52% copy from src/plugin/index-geoip/plugin.xml copy to src/plugin/lib-selenium/howto_upgrade_selenium.md index c4efadf94..3071c74cb 100644 --- a/src/plugin/index-geoip/plugin.xml +++ b/src/plugin/lib-selenium/howto_upgrade_selenium.md @@ -14,31 +14,19 @@ See the License for the specific language governing permissions and limitations under the License. --> -<plugin - id="index-geoip" - name="GeoIP2 Indexing Filter" - version="1.0.0" - provider-name="nutch.org"> - - - <runtime> - <library name="index-geoip.jar"> - <export name="*"/> - </library> - <library name="geoip2-3.0.1.jar"/> - <library name="maxmind-db-2.0.0.jar"/> - </runtime> - - <requires> - <import plugin="nutch-extensionpoints"/> - </requires> - - <extension id="org.apache.nutch.indexer.geoip" - name="Nutch GeoIP2 Indexing Filter" - point="org.apache.nutch.indexer.IndexingFilter"> - <implementation id="GeoIPIndexingFilter" - class="org.apache.nutch.indexer.geoip.GeoIPIndexingFilter"/> - </extension> - -</plugin> +1. Upgrade various driver versions dependency in src/plugin/lib-selenium/ivy.xml + +2. Upgrade Selenium's own dependencies in src/plugin/lib-selenium/plugin.xml + + To get a list of dependencies and their versions execute: + $ ant -f ./build-ivy.xml + $ ls lib | sed 's/^/ <library name="/g' | sed 's/$/">\n <export name="*"\/>\n <\/library>/g' + + Note that all dependent libraries are exported for a "library" plugin ("lib-selenium"). + + N.B. The above Regex + Sed commands may not work if you are using MacOSX's Sed. In this instance you can instal GNU Sed as follows + + $ brew install gnu-sed --with-default-names + + You can then restart your terminal and the Regex + Sed command should work just fine! diff --git a/src/plugin/lib-selenium/howto_upgrade_selenium.txt b/src/plugin/lib-selenium/howto_upgrade_selenium.txt deleted file mode 100644 index 1892a6275..000000000 --- a/src/plugin/lib-selenium/howto_upgrade_selenium.txt +++ /dev/null @@ -1,15 +0,0 @@ -1. Upgrade various driver versions dependency in src/plugin/lib-selenium/ivy.xml - -2. Upgrade Selenium's own dependencies in src/plugin/lib-selenium/plugin.xml - - To get a list of dependencies and their versions execute: - $ ant -f ./build-ivy.xml - $ ls lib | sed 's/^/ <library name="/g' | sed 's/$/">\n <export name="*"\/>\n <\/library>/g' - - Note that all dependent libraries are exported for a "library" plugin ("lib-selenium"). - - N.B. The above Regex + Sed commands may not work if you are using MacOSX's Sed. In this instance you can instal GNU Sed as follows - - $ brew install gnu-sed --with-default-names - - You can then restart your terminal and the Regex + Sed command should work just fine! diff --git a/src/plugin/lib-selenium/ivy.xml b/src/plugin/lib-selenium/ivy.xml index 7d3a2d624..0d460cdb4 100644 --- a/src/plugin/lib-selenium/ivy.xml +++ b/src/plugin/lib-selenium/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/lib-xml/ivy.xml b/src/plugin/lib-xml/ivy.xml index 9306c4d9b..4e38c4371 100644 --- a/src/plugin/lib-xml/ivy.xml +++ b/src/plugin/lib-xml/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/microformats-reltag/ivy.xml b/src/plugin/microformats-reltag/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/microformats-reltag/ivy.xml +++ b/src/plugin/microformats-reltag/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/mimetype-filter/ivy.xml b/src/plugin/mimetype-filter/ivy.xml index 624dcaf4a..3d4fc905c 100644 --- a/src/plugin/mimetype-filter/ivy.xml +++ b/src/plugin/mimetype-filter/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/nutch-extensionpoints/ivy.xml b/src/plugin/nutch-extensionpoints/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/nutch-extensionpoints/ivy.xml +++ b/src/plugin/nutch-extensionpoints/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/parse-ext/command b/src/plugin/parse-ext/command index f42c05531..329d58d96 100644 --- a/src/plugin/parse-ext/command +++ b/src/plugin/parse-ext/command @@ -1,3 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + #!/bin/bash # # Sample bash script as external command invoked by parse-ext plugin diff --git a/src/plugin/parse-ext/ivy.xml b/src/plugin/parse-ext/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/parse-ext/ivy.xml +++ b/src/plugin/parse-ext/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/parse-html/ivy.xml b/src/plugin/parse-html/ivy.xml index 69aa2eba5..1424c4d7a 100644 --- a/src/plugin/parse-html/ivy.xml +++ b/src/plugin/parse-html/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/parse-js/ivy.xml b/src/plugin/parse-js/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/parse-js/ivy.xml +++ b/src/plugin/parse-js/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/parse-js/sample/parse_embedded_js_test.html b/src/plugin/parse-js/sample/parse_embedded_js_test.html index 351beacc3..0409bba53 100644 --- a/src/plugin/parse-js/sample/parse_embedded_js_test.html +++ b/src/plugin/parse-js/sample/parse_embedded_js_test.html @@ -1,3 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <html style="font-size: 16px;"><head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> diff --git a/src/plugin/parse-js/sample/parse_pure_js_test.js b/src/plugin/parse-js/sample/parse_pure_js_test.js index f196313f8..0e486a879 100644 --- a/src/plugin/parse-js/sample/parse_pure_js_test.js +++ b/src/plugin/parse-js/sample/parse_pure_js_test.js @@ -1,3 +1,18 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + // test data for link extraction from "pure" JavaScript function selectProvider(form) { diff --git a/src/plugin/parse-metatags/ivy.xml b/src/plugin/parse-metatags/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/parse-metatags/ivy.xml +++ b/src/plugin/parse-metatags/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/parse-metatags/sample/testMetatags.html b/src/plugin/parse-metatags/sample/testMetatags.html index e9e8e6bd0..4dc86c194 100644 --- a/src/plugin/parse-metatags/sample/testMetatags.html +++ b/src/plugin/parse-metatags/sample/testMetatags.html @@ -1,3 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> <html> <head> <meta name="Keywords" content="This is a test of keywords" /> diff --git a/src/plugin/parse-metatags/sample/testMultivalueMetatags.html b/src/plugin/parse-metatags/sample/testMultivalueMetatags.html index ca8b737c2..36d2c8814 100644 --- a/src/plugin/parse-metatags/sample/testMultivalueMetatags.html +++ b/src/plugin/parse-metatags/sample/testMultivalueMetatags.html @@ -1,3 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> <html> <head> <meta name="DC.creator" content="Doug Cutting"> diff --git a/src/plugin/parse-tika/howto_upgrade_tika.txt b/src/plugin/parse-tika/howto_upgrade_tika.md similarity index 73% rename from src/plugin/parse-tika/howto_upgrade_tika.txt rename to src/plugin/parse-tika/howto_upgrade_tika.md index 46d075948..8ed6c3f3c 100644 --- a/src/plugin/parse-tika/howto_upgrade_tika.txt +++ b/src/plugin/parse-tika/howto_upgrade_tika.md @@ -1,3 +1,20 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + We are currently using a shim (https://github.com/tballison/hadoop-safe-tika because of binary conflicts in commons-io versions between what Hadoop supports and the more modern features that Apache Tika and Apache POI were using in commons-io. diff --git a/src/plugin/parse-tika/ivy.xml b/src/plugin/parse-tika/ivy.xml index 1586d9661..b89e812e1 100644 --- a/src/plugin/parse-tika/ivy.xml +++ b/src/plugin/parse-tika/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/parse-tika/sample/nutch.html b/src/plugin/parse-tika/sample/nutch.html index 0aa7c9895..809853512 100644 --- a/src/plugin/parse-tika/sample/nutch.html +++ b/src/plugin/parse-tika/sample/nutch.html @@ -1,3 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <html> <head> diff --git a/src/plugin/parse-zip/ivy.xml b/src/plugin/parse-zip/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/parse-zip/ivy.xml +++ b/src/plugin/parse-zip/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/parsefilter-debug/ivy.xml b/src/plugin/parsefilter-debug/ivy.xml index dac80e6d7..82f93c012 100644 --- a/src/plugin/parsefilter-debug/ivy.xml +++ b/src/plugin/parsefilter-debug/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/parsefilter-naivebayes/ivy.xml b/src/plugin/parsefilter-naivebayes/ivy.xml index c261adac6..66a931543 100644 --- a/src/plugin/parsefilter-naivebayes/ivy.xml +++ b/src/plugin/parsefilter-naivebayes/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/parsefilter-regex/data/regex-parsefilter.txt b/src/plugin/parsefilter-regex/data/regex-parsefilter.txt index 9d15cd899..fbc7dd303 100644 --- a/src/plugin/parsefilter-regex/data/regex-parsefilter.txt +++ b/src/plugin/parsefilter-regex/data/regex-parsefilter.txt @@ -1,3 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Example configuration file for parsefilter-regex # # Parse metadata field <name> is set to true if the HTML matches the regex. The diff --git a/src/plugin/parsefilter-regex/ivy.xml b/src/plugin/parsefilter-regex/ivy.xml index e82f92861..f33a31178 100644 --- a/src/plugin/parsefilter-regex/ivy.xml +++ b/src/plugin/parsefilter-regex/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/protocol-file/ivy.xml b/src/plugin/protocol-file/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/protocol-file/ivy.xml +++ b/src/plugin/protocol-file/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/protocol-file/sample/testprotocolfile.txt b/src/plugin/protocol-file/sample/testprotocolfile.txt index fbe8a8acf..5e684e2f4 100644 --- a/src/plugin/protocol-file/sample/testprotocolfile.txt +++ b/src/plugin/protocol-file/sample/testprotocolfile.txt @@ -1 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + Protocol File Test diff --git a/src/plugin/protocol-file/sample/testprotocolfile_(encoded).txt b/src/plugin/protocol-file/sample/testprotocolfile_(encoded).txt index fbe8a8acf..5e684e2f4 100644 --- a/src/plugin/protocol-file/sample/testprotocolfile_(encoded).txt +++ b/src/plugin/protocol-file/sample/testprotocolfile_(encoded).txt @@ -1 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + Protocol File Test diff --git a/src/plugin/protocol-foo/ivy.xml b/src/plugin/protocol-foo/ivy.xml index 1a86d6803..99b173446 100755 --- a/src/plugin/protocol-foo/ivy.xml +++ b/src/plugin/protocol-foo/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/protocol-foo/plugin.xml b/src/plugin/protocol-foo/plugin.xml index d34f6242a..954a2d41a 100755 --- a/src/plugin/protocol-foo/plugin.xml +++ b/src/plugin/protocol-foo/plugin.xml @@ -1,5 +1,4 @@ <?xml version="1.0" encoding="UTF-8"?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/protocol-ftp/ivy.xml b/src/plugin/protocol-ftp/ivy.xml index 8e1c257d6..1fbfe97f0 100644 --- a/src/plugin/protocol-ftp/ivy.xml +++ b/src/plugin/protocol-ftp/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/protocol-htmlunit/ivy.xml b/src/plugin/protocol-htmlunit/ivy.xml index dde1fe88f..fa787376b 100644 --- a/src/plugin/protocol-htmlunit/ivy.xml +++ b/src/plugin/protocol-htmlunit/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/protocol-http/ivy.xml b/src/plugin/protocol-http/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/protocol-http/ivy.xml +++ b/src/plugin/protocol-http/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/protocol-httpclient/ivy.xml b/src/plugin/protocol-httpclient/ivy.xml index 378bd7c42..e3e515dd9 100644 --- a/src/plugin/protocol-httpclient/ivy.xml +++ b/src/plugin/protocol-httpclient/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/protocol-interactiveselenium/README.md b/src/plugin/protocol-interactiveselenium/README.md index dd43ee794..545efb830 100644 --- a/src/plugin/protocol-interactiveselenium/README.md +++ b/src/plugin/protocol-interactiveselenium/README.md @@ -1,3 +1,20 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + Nutch Interactive Selenium ========================== diff --git a/src/plugin/protocol-interactiveselenium/ivy.xml b/src/plugin/protocol-interactiveselenium/ivy.xml index 506be0aec..112483bcd 100644 --- a/src/plugin/protocol-interactiveselenium/ivy.xml +++ b/src/plugin/protocol-interactiveselenium/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/protocol-okhttp/howto_upgrade_okhttp.txt b/src/plugin/protocol-okhttp/howto_upgrade_okhttp.md similarity index 52% rename from src/plugin/protocol-okhttp/howto_upgrade_okhttp.txt rename to src/plugin/protocol-okhttp/howto_upgrade_okhttp.md index b3b6f1f22..16ae70d71 100644 --- a/src/plugin/protocol-okhttp/howto_upgrade_okhttp.txt +++ b/src/plugin/protocol-okhttp/howto_upgrade_okhttp.md @@ -1,3 +1,20 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + 1. Upgrade OkHttp dependency in src/plugin/protocol-okhttp/ivy.xml 2. Upgrade OkHttp's own dependencies in src/plugin/protocol-okhttp/plugin.xml diff --git a/src/plugin/protocol-okhttp/ivy.xml b/src/plugin/protocol-okhttp/ivy.xml index ead823247..73b4fa636 100644 --- a/src/plugin/protocol-okhttp/ivy.xml +++ b/src/plugin/protocol-okhttp/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/protocol-selenium/README.md b/src/plugin/protocol-selenium/README.md index 05132b9ef..4d43c330d 100644 --- a/src/plugin/protocol-selenium/README.md +++ b/src/plugin/protocol-selenium/README.md @@ -1,3 +1,20 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + Nutch Selenium ============== diff --git a/src/plugin/protocol-selenium/ivy.xml b/src/plugin/protocol-selenium/ivy.xml index 506be0aec..112483bcd 100644 --- a/src/plugin/protocol-selenium/ivy.xml +++ b/src/plugin/protocol-selenium/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/publish-rabbitmq/ivy.xml b/src/plugin/publish-rabbitmq/ivy.xml index 7b5e3dd3c..008cdb1ca 100644 --- a/src/plugin/publish-rabbitmq/ivy.xml +++ b/src/plugin/publish-rabbitmq/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/scoring-depth/ivy.xml b/src/plugin/scoring-depth/ivy.xml index 1275664e5..cb5a0f186 100644 --- a/src/plugin/scoring-depth/ivy.xml +++ b/src/plugin/scoring-depth/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/scoring-link/ivy.xml b/src/plugin/scoring-link/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/scoring-link/ivy.xml +++ b/src/plugin/scoring-link/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/scoring-metadata/ivy.xml b/src/plugin/scoring-metadata/ivy.xml index 24d76063d..6fa1a2c06 100644 --- a/src/plugin/scoring-metadata/ivy.xml +++ b/src/plugin/scoring-metadata/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/scoring-opic/ivy.xml b/src/plugin/scoring-opic/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/scoring-opic/ivy.xml +++ b/src/plugin/scoring-opic/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/scoring-orphan/ivy.xml b/src/plugin/scoring-orphan/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/scoring-orphan/ivy.xml +++ b/src/plugin/scoring-orphan/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/scoring-similarity/ivy.xml b/src/plugin/scoring-similarity/ivy.xml index 1acd1d442..1a1945f57 100644 --- a/src/plugin/scoring-similarity/ivy.xml +++ b/src/plugin/scoring-similarity/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/subcollection/ivy.xml b/src/plugin/subcollection/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/subcollection/ivy.xml +++ b/src/plugin/subcollection/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/tld/ivy.xml b/src/plugin/tld/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/tld/ivy.xml +++ b/src/plugin/tld/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/urlfilter-automaton/ivy.xml b/src/plugin/urlfilter-automaton/ivy.xml index 6b07ba33b..e9b1e892f 100644 --- a/src/plugin/urlfilter-automaton/ivy.xml +++ b/src/plugin/urlfilter-automaton/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/urlfilter-domain/data/hosts.txt b/src/plugin/urlfilter-domain/data/hosts.txt index 2b88c3b05..8cf43745f 100644 --- a/src/plugin/urlfilter-domain/data/hosts.txt +++ b/src/plugin/urlfilter-domain/data/hosts.txt @@ -1,3 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # comments start with the pound sign net apache.org diff --git a/src/plugin/urlfilter-domain/ivy.xml b/src/plugin/urlfilter-domain/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/urlfilter-domain/ivy.xml +++ b/src/plugin/urlfilter-domain/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/urlfilter-domaindenylist/data/hosts.txt b/src/plugin/urlfilter-domaindenylist/data/hosts.txt index 2b88c3b05..8cf43745f 100644 --- a/src/plugin/urlfilter-domaindenylist/data/hosts.txt +++ b/src/plugin/urlfilter-domaindenylist/data/hosts.txt @@ -1,3 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # comments start with the pound sign net apache.org diff --git a/src/plugin/urlfilter-domaindenylist/ivy.xml b/src/plugin/urlfilter-domaindenylist/ivy.xml index 1275664e5..cb5a0f186 100644 --- a/src/plugin/urlfilter-domaindenylist/ivy.xml +++ b/src/plugin/urlfilter-domaindenylist/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/urlfilter-fast/README.md b/src/plugin/urlfilter-fast/README.md index 46b293fe8..2e5860575 100644 --- a/src/plugin/urlfilter-fast/README.md +++ b/src/plugin/urlfilter-fast/README.md @@ -1,3 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> Filters URLs based on a file of regular expressions using host/domains matching first. The default policy is to accept a URL if no matches diff --git a/src/plugin/urlfilter-fast/ivy.xml b/src/plugin/urlfilter-fast/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/urlfilter-fast/ivy.xml +++ b/src/plugin/urlfilter-fast/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/urlfilter-ignoreexempt/README.md b/src/plugin/urlfilter-ignoreexempt/README.md index d48b6729f..a8f932e75 100644 --- a/src/plugin/urlfilter-ignoreexempt/README.md +++ b/src/plugin/urlfilter-ignoreexempt/README.md @@ -1,3 +1,20 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + urlfilter-ignoreexempt ====================== This plugin allows certain urls to be exempted when the external links are configured to be ignored. diff --git a/src/plugin/urlfilter-ignoreexempt/ivy.xml b/src/plugin/urlfilter-ignoreexempt/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/urlfilter-ignoreexempt/ivy.xml +++ b/src/plugin/urlfilter-ignoreexempt/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/urlfilter-prefix/ivy.xml b/src/plugin/urlfilter-prefix/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/urlfilter-prefix/ivy.xml +++ b/src/plugin/urlfilter-prefix/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/urlfilter-regex/ivy.xml b/src/plugin/urlfilter-regex/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/urlfilter-regex/ivy.xml +++ b/src/plugin/urlfilter-regex/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/urlfilter-suffix/ivy.xml b/src/plugin/urlfilter-suffix/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/urlfilter-suffix/ivy.xml +++ b/src/plugin/urlfilter-suffix/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/urlfilter-validator/ivy.xml b/src/plugin/urlfilter-validator/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/urlfilter-validator/ivy.xml +++ b/src/plugin/urlfilter-validator/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/urlmeta/ivy.xml b/src/plugin/urlmeta/ivy.xml index 1275664e5..cb5a0f186 100644 --- a/src/plugin/urlmeta/ivy.xml +++ b/src/plugin/urlmeta/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/urlnormalizer-ajax/ivy.xml b/src/plugin/urlnormalizer-ajax/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/urlnormalizer-ajax/ivy.xml +++ b/src/plugin/urlnormalizer-ajax/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/urlnormalizer-basic/ivy.xml b/src/plugin/urlnormalizer-basic/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/urlnormalizer-basic/ivy.xml +++ b/src/plugin/urlnormalizer-basic/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/urlnormalizer-host/data/hosts.txt b/src/plugin/urlnormalizer-host/data/hosts.txt index c7e0ccfe6..b81edae14 100644 --- a/src/plugin/urlnormalizer-host/data/hosts.txt +++ b/src/plugin/urlnormalizer-host/data/hosts.txt @@ -1,3 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Force all sub domains to www. *.example.com example.com diff --git a/src/plugin/urlnormalizer-host/ivy.xml b/src/plugin/urlnormalizer-host/ivy.xml index 624dcaf4a..3d4fc905c 100644 --- a/src/plugin/urlnormalizer-host/ivy.xml +++ b/src/plugin/urlnormalizer-host/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/urlnormalizer-pass/ivy.xml b/src/plugin/urlnormalizer-pass/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/urlnormalizer-pass/ivy.xml +++ b/src/plugin/urlnormalizer-pass/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/urlnormalizer-protocol/data/protocols.txt b/src/plugin/urlnormalizer-protocol/data/protocols.txt index fc7d86cbd..159917252 100644 --- a/src/plugin/urlnormalizer-protocol/data/protocols.txt +++ b/src/plugin/urlnormalizer-protocol/data/protocols.txt @@ -1,3 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Example configuration file for urlnormalizer-protocol # # URL's of hosts listed in the configuration are normalized to the target diff --git a/src/plugin/urlnormalizer-protocol/ivy.xml b/src/plugin/urlnormalizer-protocol/ivy.xml index 624dcaf4a..3d4fc905c 100644 --- a/src/plugin/urlnormalizer-protocol/ivy.xml +++ b/src/plugin/urlnormalizer-protocol/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/urlnormalizer-querystring/ivy.xml b/src/plugin/urlnormalizer-querystring/ivy.xml index 624dcaf4a..3d4fc905c 100644 --- a/src/plugin/urlnormalizer-querystring/ivy.xml +++ b/src/plugin/urlnormalizer-querystring/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/urlnormalizer-regex/ivy.xml b/src/plugin/urlnormalizer-regex/ivy.xml index 956fd25ef..5c2c5b77e 100644 --- a/src/plugin/urlnormalizer-regex/ivy.xml +++ b/src/plugin/urlnormalizer-regex/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/plugin/urlnormalizer-regex/sample/regex-normalize-default.test b/src/plugin/urlnormalizer-regex/sample/regex-normalize-default.test index 7867ad80e..8560961c0 100644 --- a/src/plugin/urlnormalizer-regex/sample/regex-normalize-default.test +++ b/src/plugin/urlnormalizer-regex/sample/regex-normalize-default.test @@ -1,3 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # test simple removal of session id, keeping parameters before and after http://foo.com/foo.php?PHPSESSID=cdc993a493e899bed04f4d0c8a462a03 http://foo.com/foo.php http://foo.com/foo.php?f=2&PHPSESSID=cdc993a493e899bed04f4d0c8a462a03 http://foo.com/foo.php?f=2 diff --git a/src/plugin/urlnormalizer-regex/sample/regex-normalize-scope1.test b/src/plugin/urlnormalizer-regex/sample/regex-normalize-scope1.test index 9d928802e..9905e683d 100644 --- a/src/plugin/urlnormalizer-regex/sample/regex-normalize-scope1.test +++ b/src/plugin/urlnormalizer-regex/sample/regex-normalize-scope1.test @@ -1,3 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # test removal of subdomains http://www.foo.bar.com/ http://bar.com/ diff --git a/src/plugin/urlnormalizer-slash/data/slashes.txt b/src/plugin/urlnormalizer-slash/data/slashes.txt index d3bd70a66..efcdafb63 100644 --- a/src/plugin/urlnormalizer-slash/data/slashes.txt +++ b/src/plugin/urlnormalizer-slash/data/slashes.txt @@ -1,3 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Both domains have duplicate URL's, some with slashes and some without # We prefer this domain with slashes diff --git a/src/plugin/urlnormalizer-slash/ivy.xml b/src/plugin/urlnormalizer-slash/ivy.xml index 624dcaf4a..3d4fc905c 100644 --- a/src/plugin/urlnormalizer-slash/ivy.xml +++ b/src/plugin/urlnormalizer-slash/ivy.xml @@ -1,5 +1,4 @@ <?xml version="1.0" ?> - <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with diff --git a/src/test/crawl-tests.xml b/src/test/crawl-tests.xml index 01fc68301..b1e38ad3a 100644 --- a/src/test/crawl-tests.xml +++ b/src/test/crawl-tests.xml @@ -1,4 +1,20 @@ <?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> <!-- Configuration overrides used during unit tests. --> diff --git a/src/test/filter-all.txt b/src/test/filter-all.txt index 4ed567ab1..d738aec76 100644 --- a/src/test/filter-all.txt +++ b/src/test/filter-all.txt @@ -1,3 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Config file for urlfilter-suffix plugin # Filter away all urls diff --git a/src/test/log4j.properties b/src/test/log4j.properties index 3ff115f46..08e272c71 100644 --- a/src/test/log4j.properties +++ b/src/test/log4j.properties @@ -1,3 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # log4j configuration used during build and unit tests log4j.rootLogger=info,stdout diff --git a/src/test/nutch-site.xml b/src/test/nutch-site.xml index dd408739d..0d6177e5e 100644 --- a/src/test/nutch-site.xml +++ b/src/test/nutch-site.xml @@ -1,4 +1,20 @@ <?xml version="1.0"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> <!-- Configuration overrides used during unit tests. --> diff --git a/src/testresources/fetch-test-site/dup_of_pagea.html b/src/testresources/fetch-test-site/dup_of_pagea.html index 6444c4122..63c4e6153 100644 --- a/src/testresources/fetch-test-site/dup_of_pagea.html +++ b/src/testresources/fetch-test-site/dup_of_pagea.html @@ -1,3 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> <html> <head> <title>page a</title> diff --git a/src/testresources/fetch-test-site/exception.html b/src/testresources/fetch-test-site/exception.html index e1192a176..66f134ee2 100644 --- a/src/testresources/fetch-test-site/exception.html +++ b/src/testresources/fetch-test-site/exception.html @@ -1,3 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> <HTML> <HEAD> diff --git a/src/testresources/fetch-test-site/index.html b/src/testresources/fetch-test-site/index.html index d73ff3f69..3fc6e61e5 100644 --- a/src/testresources/fetch-test-site/index.html +++ b/src/testresources/fetch-test-site/index.html @@ -1,3 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> <html> <head> <title>front page</title> diff --git a/src/testresources/fetch-test-site/nested_spider_trap.html b/src/testresources/fetch-test-site/nested_spider_trap.html index 5dcf7c220..dd32ee236 100644 --- a/src/testresources/fetch-test-site/nested_spider_trap.html +++ b/src/testresources/fetch-test-site/nested_spider_trap.html @@ -1,3 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> <html> <head> <title>nested spider trap</title> diff --git a/src/testresources/fetch-test-site/pagea.html b/src/testresources/fetch-test-site/pagea.html index 6444c4122..63c4e6153 100644 --- a/src/testresources/fetch-test-site/pagea.html +++ b/src/testresources/fetch-test-site/pagea.html @@ -1,3 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> <html> <head> <title>page a</title> diff --git a/src/testresources/fetch-test-site/pageb.html b/src/testresources/fetch-test-site/pageb.html index 66e3725ef..cf77ff4f7 100644 --- a/src/testresources/fetch-test-site/pageb.html +++ b/src/testresources/fetch-test-site/pageb.html @@ -1,3 +1,19 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> <html> <head> <title>bage b</title> diff --git a/src/testresources/fetch-test-site/robots.txt b/src/testresources/fetch-test-site/robots.txt index e69de29bb..fc590f973 100644 --- a/src/testresources/fetch-test-site/robots.txt +++ b/src/testresources/fetch-test-site/robots.txt @@ -0,0 +1,14 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. \ No newline at end of file