This is an automated email from the ASF dual-hosted git repository.

lewismc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new 792ed2891 NUTCH-3015 Add more CI steps to GitHub master-build.yml 
(#790)
792ed2891 is described below

commit 792ed28914f4beb2fb8b8ce28eebe17196c92af1
Author: Lewis John McGibbney <lewis.mcgibb...@gmail.com>
AuthorDate: Fri Oct 27 15:04:22 2023 -0700

    NUTCH-3015 Add more CI steps to GitHub master-build.yml (#790)
---
 .../{master-build.yml => dependency-check.yml}     | 25 ++++-----
 .github/workflows/master-build.yml                 | 64 +++++++++++++++++-----
 .gitignore                                         |  1 +
 build.xml                                          | 52 +++++++++++++++---
 .../dependency-check-suppressions.xml              |  5 --
 src/java/overview.html                             | 16 ++++++
 .../creativecommons/conf/crawl-urlfilter.txt       | 15 +++++
 src/plugin/creativecommons/conf/nutch-site.xml     | 16 ++++++
 src/plugin/creativecommons/data/anchor.html        | 16 ++++++
 src/plugin/creativecommons/data/rdf.html           | 16 ++++++
 src/plugin/creativecommons/data/rel.html           | 16 ++++++
 src/plugin/creativecommons/ivy.xml                 |  1 -
 src/plugin/exchange-jexl/README.md                 | 17 ++++++
 src/plugin/exchange-jexl/ivy.xml                   |  1 -
 src/plugin/feed/ivy.xml                            |  1 -
 src/plugin/headings/ivy.xml                        |  1 -
 src/plugin/index-anchor/ivy.xml                    |  1 -
 src/plugin/index-basic/ivy.xml                     |  1 -
 src/plugin/index-geoip/ivy.xml                     |  1 -
 src/plugin/index-geoip/plugin.xml                  |  1 +
 src/plugin/index-jexl-filter/ivy.xml               |  1 -
 src/plugin/index-links/README.md                   | 17 ++++++
 src/plugin/index-links/ivy.xml                     |  1 -
 src/plugin/index-metadata/ivy.xml                  |  1 -
 src/plugin/index-more/ivy.xml                      |  1 -
 src/plugin/index-replace/ivy.xml                   |  1 -
 .../index-replace/sample/testIndexReplace.html     | 16 ++++++
 src/plugin/index-static/ivy.xml                    |  1 -
 src/plugin/indexer-cloudsearch/README.md           | 17 ++++++
 src/plugin/indexer-cloudsearch/createCSDomain.sh   | 15 +++++
 src/plugin/indexer-csv/README.md                   | 17 ++++++
 src/plugin/indexer-csv/ivy.xml                     |  1 -
 src/plugin/indexer-dummy/README.md                 | 17 ++++++
 src/plugin/indexer-dummy/ivy.xml                   |  1 -
 src/plugin/indexer-elastic/README.md               | 17 ++++++
 .../{howto_upgrade_es.txt => howto_upgrade_es.md}  | 17 ++++++
 src/plugin/indexer-kafka/ivy.xml                   |  1 -
 src/plugin/indexer-opensearch-1x/README.md         | 17 ++++++
 ..._opensearch.txt => howto_upgrade_opensearch.md} | 17 ++++++
 src/plugin/indexer-rabbit/README.md                | 17 ++++++
 src/plugin/indexer-rabbit/ivy.xml                  |  1 -
 src/plugin/indexer-solr/README.md                  | 17 ++++++
 ...owto_upgrade_solr.txt => howto_upgrade_solr.md} | 17 ++++++
 src/plugin/indexer-solr/ivy.xml                    | 25 +++++----
 src/plugin/indexer-solr/plugin.xml                 | 26 +++++----
 src/plugin/language-identifier/ivy.xml             |  1 -
 src/plugin/lib-htmlunit/ivy.xml                    |  1 -
 src/plugin/lib-http/ivy.xml                        |  1 -
 src/plugin/lib-nekohtml/ivy.xml                    |  1 -
 src/plugin/lib-rabbitmq/ivy.xml                    |  1 -
 src/plugin/lib-regex-filter/ivy.xml                |  1 -
 src/plugin/lib-selenium/README.md                  | 17 ++++++
 .../howto_upgrade_selenium.md}                     | 42 +++++---------
 src/plugin/lib-selenium/howto_upgrade_selenium.txt | 15 -----
 src/plugin/lib-selenium/ivy.xml                    |  1 -
 src/plugin/lib-xml/ivy.xml                         |  1 -
 src/plugin/microformats-reltag/ivy.xml             |  1 -
 src/plugin/mimetype-filter/ivy.xml                 |  1 -
 src/plugin/nutch-extensionpoints/ivy.xml           |  1 -
 src/plugin/parse-ext/command                       | 15 +++++
 src/plugin/parse-ext/ivy.xml                       |  1 -
 src/plugin/parse-html/ivy.xml                      |  1 -
 src/plugin/parse-js/ivy.xml                        |  1 -
 .../parse-js/sample/parse_embedded_js_test.html    | 16 ++++++
 src/plugin/parse-js/sample/parse_pure_js_test.js   | 15 +++++
 src/plugin/parse-metatags/ivy.xml                  |  1 -
 src/plugin/parse-metatags/sample/testMetatags.html | 16 ++++++
 .../sample/testMultivalueMetatags.html             | 16 ++++++
 ...owto_upgrade_tika.txt => howto_upgrade_tika.md} | 17 ++++++
 src/plugin/parse-tika/ivy.xml                      |  1 -
 src/plugin/parse-tika/sample/nutch.html            | 16 ++++++
 src/plugin/parse-zip/ivy.xml                       |  1 -
 src/plugin/parsefilter-debug/ivy.xml               |  1 -
 src/plugin/parsefilter-naivebayes/ivy.xml          |  1 -
 .../parsefilter-regex/data/regex-parsefilter.txt   | 15 +++++
 src/plugin/parsefilter-regex/ivy.xml               |  1 -
 src/plugin/protocol-file/ivy.xml                   |  1 -
 .../protocol-file/sample/testprotocolfile.txt      | 15 +++++
 .../sample/testprotocolfile_(encoded).txt          | 15 +++++
 src/plugin/protocol-foo/ivy.xml                    |  1 -
 src/plugin/protocol-foo/plugin.xml                 |  1 -
 src/plugin/protocol-ftp/ivy.xml                    |  1 -
 src/plugin/protocol-htmlunit/ivy.xml               |  1 -
 src/plugin/protocol-http/ivy.xml                   |  1 -
 src/plugin/protocol-httpclient/ivy.xml             |  1 -
 src/plugin/protocol-interactiveselenium/README.md  | 17 ++++++
 src/plugin/protocol-interactiveselenium/ivy.xml    |  1 -
 ..._upgrade_okhttp.txt => howto_upgrade_okhttp.md} | 17 ++++++
 src/plugin/protocol-okhttp/ivy.xml                 |  1 -
 src/plugin/protocol-selenium/README.md             | 17 ++++++
 src/plugin/protocol-selenium/ivy.xml               |  1 -
 src/plugin/publish-rabbitmq/ivy.xml                |  1 -
 src/plugin/scoring-depth/ivy.xml                   |  1 -
 src/plugin/scoring-link/ivy.xml                    |  1 -
 src/plugin/scoring-metadata/ivy.xml                |  1 -
 src/plugin/scoring-opic/ivy.xml                    |  1 -
 src/plugin/scoring-orphan/ivy.xml                  |  1 -
 src/plugin/scoring-similarity/ivy.xml              |  1 -
 src/plugin/subcollection/ivy.xml                   |  1 -
 src/plugin/tld/ivy.xml                             |  1 -
 src/plugin/urlfilter-automaton/ivy.xml             |  1 -
 src/plugin/urlfilter-domain/data/hosts.txt         | 15 +++++
 src/plugin/urlfilter-domain/ivy.xml                |  1 -
 src/plugin/urlfilter-domaindenylist/data/hosts.txt | 15 +++++
 src/plugin/urlfilter-domaindenylist/ivy.xml        |  1 -
 src/plugin/urlfilter-fast/README.md                | 16 ++++++
 src/plugin/urlfilter-fast/ivy.xml                  |  1 -
 src/plugin/urlfilter-ignoreexempt/README.md        | 17 ++++++
 src/plugin/urlfilter-ignoreexempt/ivy.xml          |  1 -
 src/plugin/urlfilter-prefix/ivy.xml                |  1 -
 src/plugin/urlfilter-regex/ivy.xml                 |  1 -
 src/plugin/urlfilter-suffix/ivy.xml                |  1 -
 src/plugin/urlfilter-validator/ivy.xml             |  1 -
 src/plugin/urlmeta/ivy.xml                         |  1 -
 src/plugin/urlnormalizer-ajax/ivy.xml              |  1 -
 src/plugin/urlnormalizer-basic/ivy.xml             |  1 -
 src/plugin/urlnormalizer-host/data/hosts.txt       | 15 +++++
 src/plugin/urlnormalizer-host/ivy.xml              |  1 -
 src/plugin/urlnormalizer-pass/ivy.xml              |  1 -
 .../urlnormalizer-protocol/data/protocols.txt      | 15 +++++
 src/plugin/urlnormalizer-protocol/ivy.xml          |  1 -
 src/plugin/urlnormalizer-querystring/ivy.xml       |  1 -
 src/plugin/urlnormalizer-regex/ivy.xml             |  1 -
 .../sample/regex-normalize-default.test            | 15 +++++
 .../sample/regex-normalize-scope1.test             | 15 +++++
 src/plugin/urlnormalizer-slash/data/slashes.txt    | 15 +++++
 src/plugin/urlnormalizer-slash/ivy.xml             |  1 -
 src/test/crawl-tests.xml                           | 16 ++++++
 src/test/filter-all.txt                            | 15 +++++
 src/test/log4j.properties                          | 15 +++++
 src/test/nutch-site.xml                            | 16 ++++++
 .../fetch-test-site/dup_of_pagea.html              | 16 ++++++
 src/testresources/fetch-test-site/exception.html   | 16 ++++++
 src/testresources/fetch-test-site/index.html       | 16 ++++++
 .../fetch-test-site/nested_spider_trap.html        | 16 ++++++
 src/testresources/fetch-test-site/pagea.html       | 16 ++++++
 src/testresources/fetch-test-site/pageb.html       | 16 ++++++
 src/testresources/fetch-test-site/robots.txt       | 14 +++++
 138 files changed, 1017 insertions(+), 177 deletions(-)

diff --git a/.github/workflows/master-build.yml 
b/.github/workflows/dependency-check.yml
similarity index 72%
copy from .github/workflows/master-build.yml
copy to .github/workflows/dependency-check.yml
index ba1d470ec..f07f746a0 100644
--- a/.github/workflows/master-build.yml
+++ b/.github/workflows/dependency-check.yml
@@ -1,4 +1,3 @@
-#
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
@@ -13,28 +12,26 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
 
 name: master pr build
 
 on:
-  push:
-    branches: [ master ]
-  pull_request:
-    branches: [ master ]
+  schedule:
+    - cron: '0 0 * * *'  # every day at midnight
 
 jobs:
-  build:
-    runs-on: ubuntu-latest
+  dependency-check:
     strategy:
       matrix:
-        java: [ '11' ]
-
+        java: ['11']
+        os: [ubuntu-latest]
+    runs-on: ${{ matrix.os }}
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - name: Set up JDK ${{ matrix.java }}
-        uses: actions/setup-java@v1
+        uses: actions/setup-java@v3
         with:
           java-version: ${{ matrix.java }}
-      - name: Build with Ant
-        run: ant clean nightly javadoc -buildfile build.xml
+          distribution: 'temurin'
+      - name: Dependency check
+        run: ant clean dependency-check -buildfile build.xml
diff --git a/.github/workflows/master-build.yml 
b/.github/workflows/master-build.yml
index ba1d470ec..e0af58df0 100644
--- a/.github/workflows/master-build.yml
+++ b/.github/workflows/master-build.yml
@@ -1,4 +1,3 @@
-#
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
@@ -13,28 +12,67 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
 
-name: master pr build
+name: master pull request ci
 
 on:
   push:
-    branches: [ master ]
+    branches: [master]
   pull_request:
-    branches: [ master ]
+    types: [opened, synchronize, reopened]
+    branches: [master]
 
 jobs:
-  build:
-    runs-on: ubuntu-latest
+  javadoc:
     strategy:
       matrix:
-        java: [ '11' ]
-
+        java: ['11']
+        os: [ubuntu-latest]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up JDK ${{ matrix.java }}
+        uses: actions/setup-java@v3
+        with:
+          java-version: ${{ matrix.java }}
+          distribution: 'temurin'
+      - name: Javadoc
+        run: ant clean javadoc -buildfile build.xml
+  rat:
+    strategy:
+      matrix:
+        java: ['11']
+        os: [ubuntu-latest]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up JDK ${{ matrix.java }}
+        uses: actions/setup-java@v3
+        with:
+          java-version: ${{ matrix.java }}
+          distribution: 'temurin'
+      - name: Run Apache Rat
+        run: ant clean run-rat -buildfile build.xml
+      - name: Cache unknown licenses
+        run: echo "UNKNOWN_LICENSES=$(sed -n 18p 
/home/runner/work/nutch/nutch/build/apache-rat-report.txt)" >> $GITHUB_ENV
+      - name: Versions
+        run: |
+          echo $UNKNOWN_LICENSES
+      - name: Fail if any unknown licenses
+        if: ${{ env.UNKNOWN_LICENSES != '0 Unknown Licenses' }}
+        run: exit 1
+  test:
+    strategy:
+      matrix:
+        java: ['11']
+        os: [ubuntu-latest, macos-latest]
+    runs-on: ${{ matrix.os }}
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - name: Set up JDK ${{ matrix.java }}
-        uses: actions/setup-java@v1
+        uses: actions/setup-java@v3
         with:
           java-version: ${{ matrix.java }}
-      - name: Build with Ant
-        run: ant clean nightly javadoc -buildfile build.xml
+          distribution: 'temurin'
+      - name: Test
+        run: ant clean test -buildfile build.xml
diff --git a/.gitignore b/.gitignore
index b46690852..12365dd0d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,3 +28,4 @@ csvindexwriter
 lib/spotbugs-*
 ivy/dependency-check-ant/*
 .gradle*
+ivy/apache-rat-*
diff --git a/build.xml b/build.xml
index b44581405..dd9797302 100644
--- a/build.xml
+++ b/build.xml
@@ -38,7 +38,7 @@
   <property name="maven-javadoc-jar" 
value="${release.dir}/${artifactId}-${version}-javadoc.jar" />
   <property name="maven-sources-jar" 
value="${release.dir}/${artifactId}-${version}-sources.jar" />
 
-  <property name="dependency-check-ant.version" value="7.1.1" />
+  <property name="dependency-check-ant.version" value="8.4.2" />
   <property name="dependency-check-ant.home" 
value="${ivy.dir}/dependency-check-ant" />
   <property name="dependency-check-ant.jar" 
value="${dependency-check-ant.home}/dependency-check-ant.jar" />
 
@@ -48,7 +48,7 @@
   <property name="spotbugs.home" 
value="${ivy.dir}/spotbugs-${spotbugs.version}" />
   <property name="spotbugs.jar" value="${spotbugs.home}/lib/spotbugs-ant.jar" 
/>
 
-  <property name="apache-rat.version" value="0.14" />
+  <property name="apache-rat.version" value="0.15" />
   <property name="apache-rat.home" 
value="${ivy.dir}/apache-rat-${apache-rat.version}" />
   <property name="apache-rat.jar" 
value="${apache-rat.home}/apache-rat-${apache-rat.version}.jar" />
 
@@ -640,13 +640,15 @@
     </fileset>
   </path>
 
-  <target name="report-vulnerabilities" depends="jar, compile-plugins, 
dependency-check-ant-download" description="--> check dependencies for security 
vulnerabilities">
+  <target name="dependency-check" depends="jar, compile-plugins, 
dependency-check-ant-download" description="--> check dependencies for security 
vulnerabilities">
     <taskdef resource="dependency-check-taskdefs.properties">
       <classpath refid="dependency-check-ant.path" />
     </taskdef>
     <dependency-check projectname="${name}"
                       reportoutputdirectory="${dependency-check-ant.home}"
-                      reportformat="ALL">
+                      reportformat="ALL"
+                      assemblyAnalyzerEnabled="false"
+                      failBuildOnCVSS="1">
         <suppressionfile 
path="${dependency-check-ant.home}/dependency-check-suppressions.xml" />
         <retirejsFilter regex="copyright.*jeremy long" />
         <fileset dir="${build.dir}">
@@ -1025,7 +1027,7 @@
 
   <target name="apache-rat-download-unchecked" unless="apache-rat.jar.found"
           description="--> downloads the Apache Rat jar">
-    <get 
src="https://www.apache.org/dist/creadur/apache-rat-${apache-rat.version}/apache-rat-${apache-rat.version}-bin.tar.gz";
+    <get 
src="https://archive.apache.org/dist/creadur/apache-rat-${apache-rat.version}/apache-rat-${apache-rat.version}-bin.tar.gz";
          dest="${ivy.dir}/apache-rat-${apache-rat.version}-bin.tar.gz" 
usetimestamp="false" />
 
     <untar src="${ivy.dir}/apache-rat-${apache-rat.version}-bin.tar.gz"
@@ -1035,8 +1037,8 @@
     <delete file="${ivy.dir}/apache-rat-${apache-rat.version}-bin.tar.gz" />
   </target>
 
-  <target name="rat-sources" depends="init, apache-rat-download"
-    description="--> runs RAT tasks over src/java">
+  <target name="run-rat" depends="init, apache-rat-download"
+    description="--> runs Apache Rat on codebase">
     <taskdef
         uri="antlib:org.apache.rat.anttasks"
         resource="org/apache/rat/anttasks/antlib.xml">
@@ -1047,8 +1049,40 @@
     <rat:report
         reportFile="${build.dir}/apache-rat-report.txt">
       <fileset dir="src">
-        <include name="java/**/*"/>
-        <include name="plugin/**/src/**/*"/>
+        <include name="**"/>
+        <exclude 
name="plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/langmappings.properties"/>
+        <exclude 
name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/de.test"/>
+        <exclude 
name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/en.test"/>
+        <exclude 
name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/es.test"/>
+        <exclude 
name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/fi.test"/>
+        <exclude 
name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/fr.test"/>
+        <exclude 
name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/it.test"/>
+        <exclude 
name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/nl.test"/>
+        <exclude 
name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/pt.test"/>
+        <exclude 
name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/sv.test"/>
+        <exclude 
name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/test-referencial.txt"/>
+        <exclude 
name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/da.test"/>
+        <exclude name="plugin/parse-tika/sample/ootest.txt"/>
+        <exclude name="plugin/parse-tika/sample/test.rtf"/>
+        <exclude name="plugin/urlfilter-ignoreexempt/data/.donotdelete"/>
+        <exclude name="plugin/urlfilter-automaton/sample/Benchmarks.rules"/>
+        <exclude name="plugin/urlfilter-automaton/sample/Benchmarks.urls"/>
+        <exclude 
name="plugin/urlfilter-automaton/sample/IntranetCrawling.rules"/>
+        <exclude 
name="plugin/urlfilter-automaton/sample/IntranetCrawling.urls"/>
+        <exclude 
name="plugin/urlfilter-automaton/sample/WholeWebCrawling.rules"/>
+        <exclude 
name="plugin/urlfilter-automaton/sample/WholeWebCrawling.urls"/>
+        <exclude name="plugin/urlfilter-fast/sample/Benchmarks.urls"/>
+        <exclude 
name="plugin/urlfilter-fast/sample/fast-urlfilter-benchmark.txt"/>
+        <exclude name="plugin/urlfilter-fast/sample/fast-urlfilter-test.txt"/>
+        <exclude name="plugin/urlfilter-fast/sample/test.urls"/>
+        <exclude name="plugin/urlfilter-regex/sample/Benchmarks.rules"/>
+        <exclude name="plugin/urlfilter-regex/sample/Benchmarks.urls"/>
+        <exclude name="plugin/urlfilter-regex/sample/IntranetCrawling.rules"/>
+        <exclude name="plugin/urlfilter-regex/sample/IntranetCrawling.urls"/>
+        <exclude name="plugin/urlfilter-regex/sample/WholeWebCrawling.rules"/>
+        <exclude name="plugin/urlfilter-regex/sample/WholeWebCrawling.urls"/>
+        <exclude name="plugin/urlfilter-regex/sample/nutch1838.rules"/>
+        <exclude name="plugin/urlfilter-regex/sample/nutch1838.urls"/>
       </fileset>
     </rat:report>
   </target>
diff --git a/ivy/dependency-check-ant/dependency-check-suppressions.xml 
b/ivy/dependency-check-ant/dependency-check-suppressions.xml
index e7de8febb..a7f4ca16d 100644
--- a/ivy/dependency-check-ant/dependency-check-suppressions.xml
+++ b/ivy/dependency-check-ant/dependency-check-suppressions.xml
@@ -1,8 +1,3 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <suppressions 
xmlns="https://jeremylong.github.io/DependencyCheck/dependency-suppression.1.1.xsd";>
-   <suppress>
-      <notes>only applies to tika-server &lt; 1.18</notes>
-      <gav 
regex="true">^org\.(apache\.tika:tika-(core|parsers)|gagravarr:vorbis-java-tika):.*$</gav>
-      <cve>CVE-2018-1335</cve>
-   </suppress>
 </suppressions>
diff --git a/src/java/overview.html b/src/java/overview.html
index 11321417b..3de53a7d2 100644
--- a/src/java/overview.html
+++ b/src/java/overview.html
@@ -1,3 +1,19 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+ 
+    http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
 <html>
 <head>
    <title>Apache Nutch</title>
diff --git a/src/plugin/creativecommons/conf/crawl-urlfilter.txt 
b/src/plugin/creativecommons/conf/crawl-urlfilter.txt
index 324617f07..eb6786e4b 100644
--- a/src/plugin/creativecommons/conf/crawl-urlfilter.txt
+++ b/src/plugin/creativecommons/conf/crawl-urlfilter.txt
@@ -1,3 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Creative Commnons crawl filter
 
 # Each non-comment, non-blank line contains a regular expression
diff --git a/src/plugin/creativecommons/conf/nutch-site.xml 
b/src/plugin/creativecommons/conf/nutch-site.xml
index e28e12a9a..4b343b2cc 100644
--- a/src/plugin/creativecommons/conf/nutch-site.xml
+++ b/src/plugin/creativecommons/conf/nutch-site.xml
@@ -1,5 +1,21 @@
 <?xml version="1.0"?>
 <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
 
 <!-- Creative Commons' Nutch configuration -->
 
diff --git a/src/plugin/creativecommons/data/anchor.html 
b/src/plugin/creativecommons/data/anchor.html
index 90b522759..3267bc9ea 100755
--- a/src/plugin/creativecommons/data/anchor.html
+++ b/src/plugin/creativecommons/data/anchor.html
@@ -1,3 +1,19 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 
"http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd";>
 <html>
 <head>
diff --git a/src/plugin/creativecommons/data/rdf.html 
b/src/plugin/creativecommons/data/rdf.html
index fb2c34dfe..60c27cc54 100755
--- a/src/plugin/creativecommons/data/rdf.html
+++ b/src/plugin/creativecommons/data/rdf.html
@@ -1,3 +1,19 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
 <html>
  <head>
diff --git a/src/plugin/creativecommons/data/rel.html 
b/src/plugin/creativecommons/data/rel.html
index 413d52f86..3d11572d8 100755
--- a/src/plugin/creativecommons/data/rel.html
+++ b/src/plugin/creativecommons/data/rel.html
@@ -1,3 +1,19 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";>
 <html xmlns="http://www.w3.org/1999/xhtml"; lang="en"><head>
 </head><body>
diff --git a/src/plugin/creativecommons/ivy.xml 
b/src/plugin/creativecommons/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/creativecommons/ivy.xml
+++ b/src/plugin/creativecommons/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/exchange-jexl/README.md 
b/src/plugin/exchange-jexl/README.md
index 2d2024276..35a711b90 100644
--- a/src/plugin/exchange-jexl/README.md
+++ b/src/plugin/exchange-jexl/README.md
@@ -1,3 +1,20 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
 exchange-jexl plugin for Nutch  
 ==============================
 
diff --git a/src/plugin/exchange-jexl/ivy.xml b/src/plugin/exchange-jexl/ivy.xml
index 1275664e5..cb5a0f186 100644
--- a/src/plugin/exchange-jexl/ivy.xml
+++ b/src/plugin/exchange-jexl/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/feed/ivy.xml b/src/plugin/feed/ivy.xml
index 7e3f4ede3..a7671307b 100644
--- a/src/plugin/feed/ivy.xml
+++ b/src/plugin/feed/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/headings/ivy.xml b/src/plugin/headings/ivy.xml
index a8d6b9d48..63007f93c 100644
--- a/src/plugin/headings/ivy.xml
+++ b/src/plugin/headings/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/index-anchor/ivy.xml b/src/plugin/index-anchor/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/index-anchor/ivy.xml
+++ b/src/plugin/index-anchor/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/index-basic/ivy.xml b/src/plugin/index-basic/ivy.xml
index 673ea7f09..7bae19bb9 100644
--- a/src/plugin/index-basic/ivy.xml
+++ b/src/plugin/index-basic/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/index-geoip/ivy.xml b/src/plugin/index-geoip/ivy.xml
index 2eda5a63f..45a638819 100644
--- a/src/plugin/index-geoip/ivy.xml
+++ b/src/plugin/index-geoip/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/index-geoip/plugin.xml 
b/src/plugin/index-geoip/plugin.xml
index c4efadf94..dda1b6a7b 100644
--- a/src/plugin/index-geoip/plugin.xml
+++ b/src/plugin/index-geoip/plugin.xml
@@ -1,3 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
 <!--
  Licensed to the Apache Software Foundation (ASF) under one or more
  contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/index-jexl-filter/ivy.xml 
b/src/plugin/index-jexl-filter/ivy.xml
index 624dcaf4a..3d4fc905c 100644
--- a/src/plugin/index-jexl-filter/ivy.xml
+++ b/src/plugin/index-jexl-filter/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/index-links/README.md b/src/plugin/index-links/README.md
index f25d1cf6d..ac0f071f4 100644
--- a/src/plugin/index-links/README.md
+++ b/src/plugin/index-links/README.md
@@ -1,3 +1,20 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
 indexer-links plugin for Nutch
 ==============================
 
diff --git a/src/plugin/index-links/ivy.xml b/src/plugin/index-links/ivy.xml
index 624dcaf4a..3d4fc905c 100644
--- a/src/plugin/index-links/ivy.xml
+++ b/src/plugin/index-links/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/index-metadata/ivy.xml 
b/src/plugin/index-metadata/ivy.xml
index 1275664e5..cb5a0f186 100644
--- a/src/plugin/index-metadata/ivy.xml
+++ b/src/plugin/index-metadata/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/index-more/ivy.xml b/src/plugin/index-more/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/index-more/ivy.xml
+++ b/src/plugin/index-more/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/index-replace/ivy.xml b/src/plugin/index-replace/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/index-replace/ivy.xml
+++ b/src/plugin/index-replace/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/index-replace/sample/testIndexReplace.html 
b/src/plugin/index-replace/sample/testIndexReplace.html
index 0b90fc211..fb2ef03a5 100644
--- a/src/plugin/index-replace/sample/testIndexReplace.html
+++ b/src/plugin/index-replace/sample/testIndexReplace.html
@@ -1,3 +1,19 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
 <html>
   <head>
     <title>Testing the power of the index-replace plugin</title>
diff --git a/src/plugin/index-static/ivy.xml b/src/plugin/index-static/ivy.xml
index 1275664e5..cb5a0f186 100644
--- a/src/plugin/index-static/ivy.xml
+++ b/src/plugin/index-static/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/indexer-cloudsearch/README.md 
b/src/plugin/indexer-cloudsearch/README.md
index 10b5daa90..a0609c0fb 100644
--- a/src/plugin/indexer-cloudsearch/README.md
+++ b/src/plugin/indexer-cloudsearch/README.md
@@ -1,3 +1,20 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
 AWS CloudSearch plugin for Nutch 
 ================================
 
diff --git a/src/plugin/indexer-cloudsearch/createCSDomain.sh 
b/src/plugin/indexer-cloudsearch/createCSDomain.sh
index 24fb0156c..1cb8481fe 100644
--- a/src/plugin/indexer-cloudsearch/createCSDomain.sh
+++ b/src/plugin/indexer-cloudsearch/createCSDomain.sh
@@ -1,3 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # example of domain configuration for CloudSearch
 
 DOMAIN="$1"
diff --git a/src/plugin/indexer-csv/README.md b/src/plugin/indexer-csv/README.md
index 80220974a..4d1288b19 100644
--- a/src/plugin/indexer-csv/README.md
+++ b/src/plugin/indexer-csv/README.md
@@ -1,3 +1,20 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
 indexer-csv plugin for Nutch 
 ============================
 
diff --git a/src/plugin/indexer-csv/ivy.xml b/src/plugin/indexer-csv/ivy.xml
index 75b5d54e5..e7bf87546 100644
--- a/src/plugin/indexer-csv/ivy.xml
+++ b/src/plugin/indexer-csv/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/indexer-dummy/README.md 
b/src/plugin/indexer-dummy/README.md
index 2a4b2bd15..a7fa53009 100644
--- a/src/plugin/indexer-dummy/README.md
+++ b/src/plugin/indexer-dummy/README.md
@@ -1,3 +1,20 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
 indexer-dummy plugin for Nutch 
 ==============================
 
diff --git a/src/plugin/indexer-dummy/ivy.xml b/src/plugin/indexer-dummy/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/indexer-dummy/ivy.xml
+++ b/src/plugin/indexer-dummy/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/indexer-elastic/README.md 
b/src/plugin/indexer-elastic/README.md
index 466762e1c..3dfd888ff 100644
--- a/src/plugin/indexer-elastic/README.md
+++ b/src/plugin/indexer-elastic/README.md
@@ -1,3 +1,20 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
 indexer-elastic plugin for Nutch 
 ================================
 
diff --git a/src/plugin/indexer-elastic/howto_upgrade_es.txt 
b/src/plugin/indexer-elastic/howto_upgrade_es.md
similarity index 60%
rename from src/plugin/indexer-elastic/howto_upgrade_es.txt
rename to src/plugin/indexer-elastic/howto_upgrade_es.md
index a8156444c..b57e0c02f 100644
--- a/src/plugin/indexer-elastic/howto_upgrade_es.txt
+++ b/src/plugin/indexer-elastic/howto_upgrade_es.md
@@ -1,3 +1,20 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
 1. Upgrade Elasticsearch dependency in src/plugin/indexer-elastic/ivy.xml
 
 2. Upgrade the Elasticsearch specific dependencies in 
src/plugin/indexer-elastic/plugin.xml
diff --git a/src/plugin/indexer-kafka/ivy.xml b/src/plugin/indexer-kafka/ivy.xml
index 7bdd94324..9d605c50b 100644
--- a/src/plugin/indexer-kafka/ivy.xml
+++ b/src/plugin/indexer-kafka/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/indexer-opensearch-1x/README.md 
b/src/plugin/indexer-opensearch-1x/README.md
index 52e5844af..e5e76f0b6 100644
--- a/src/plugin/indexer-opensearch-1x/README.md
+++ b/src/plugin/indexer-opensearch-1x/README.md
@@ -1,3 +1,20 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
 indexer-opensearch1x plugin for Nutch 
 ================================
 
diff --git a/src/plugin/indexer-opensearch-1x/howto_upgrade_opensearch.txt 
b/src/plugin/indexer-opensearch-1x/howto_upgrade_opensearch.md
similarity index 60%
rename from src/plugin/indexer-opensearch-1x/howto_upgrade_opensearch.txt
rename to src/plugin/indexer-opensearch-1x/howto_upgrade_opensearch.md
index 072590044..c9b723ffc 100644
--- a/src/plugin/indexer-opensearch-1x/howto_upgrade_opensearch.txt
+++ b/src/plugin/indexer-opensearch-1x/howto_upgrade_opensearch.md
@@ -1,3 +1,20 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
 1. Upgrade OpenSearch dependency in src/plugin/indexer-opensearch-1x/ivy.xml
 
 2. Upgrade the OpenSearch specific dependencies in 
src/plugin/indexer-opensearch-1x/plugin.xml
diff --git a/src/plugin/indexer-rabbit/README.md 
b/src/plugin/indexer-rabbit/README.md
index 6ea09a915..8040cd6c7 100644
--- a/src/plugin/indexer-rabbit/README.md
+++ b/src/plugin/indexer-rabbit/README.md
@@ -1,3 +1,20 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
 indexer-rabbit plugin for Nutch
 ===============================
 
diff --git a/src/plugin/indexer-rabbit/ivy.xml 
b/src/plugin/indexer-rabbit/ivy.xml
index dd450cf7f..d2daf91da 100644
--- a/src/plugin/indexer-rabbit/ivy.xml
+++ b/src/plugin/indexer-rabbit/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/indexer-solr/README.md 
b/src/plugin/indexer-solr/README.md
index c3a4601e1..3a27e4116 100644
--- a/src/plugin/indexer-solr/README.md
+++ b/src/plugin/indexer-solr/README.md
@@ -1,3 +1,20 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
 indexer-solr plugin for Nutch 
 =============================
 
diff --git a/src/plugin/indexer-solr/howto_upgrade_solr.txt 
b/src/plugin/indexer-solr/howto_upgrade_solr.md
similarity index 58%
rename from src/plugin/indexer-solr/howto_upgrade_solr.txt
rename to src/plugin/indexer-solr/howto_upgrade_solr.md
index b2a7eb5c8..905fb84a9 100644
--- a/src/plugin/indexer-solr/howto_upgrade_solr.txt
+++ b/src/plugin/indexer-solr/howto_upgrade_solr.md
@@ -1,3 +1,20 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
 1. Upgrade Solr dependency in src/plugin/indexer-solr/ivy.xml
 
 2. Upgrade the Solr specific dependencies in src/plugin/indexer-solr/plugin.xml
diff --git a/src/plugin/indexer-solr/ivy.xml b/src/plugin/indexer-solr/ivy.xml
index ce59942da..ab5fd72c7 100644
--- a/src/plugin/indexer-solr/ivy.xml
+++ b/src/plugin/indexer-solr/ivy.xml
@@ -1,15 +1,20 @@
 <?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
 
-<!-- Licensed to the Apache Software Foundation (ASF) under one or more 
contributor 
-       license agreements. See the NOTICE file distributed with this work for 
additional 
-       information regarding copyright ownership. The ASF licenses this file 
to 
-       You under the Apache License, Version 2.0 (the "License"); you may not 
use 
-       this file except in compliance with the License. You may obtain a copy 
of 
-       the License at http://www.apache.org/licenses/LICENSE-2.0 Unless 
required 
-       by applicable law or agreed to in writing, software distributed under 
the 
-       License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
CONDITIONS 
-       OF ANY KIND, either express or implied. See the License for the 
specific 
-       language governing permissions and limitations under the License. -->
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
 
 <ivy-module version="1.0">
        <info organisation="org.apache.nutch" module="${ant.project.name}">
diff --git a/src/plugin/indexer-solr/plugin.xml 
b/src/plugin/indexer-solr/plugin.xml
index f672ac9ed..21cc7d8bd 100644
--- a/src/plugin/indexer-solr/plugin.xml
+++ b/src/plugin/indexer-solr/plugin.xml
@@ -1,14 +1,20 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<!-- Licensed to the Apache Software Foundation (ASF) under one or more 
contributor 
-       license agreements. See the NOTICE file distributed with this work for 
additional 
-       information regarding copyright ownership. The ASF licenses this file 
to 
-       You under the Apache License, Version 2.0 (the "License"); you may not 
use 
-       this file except in compliance with the License. You may obtain a copy 
of 
-       the License at http://www.apache.org/licenses/LICENSE-2.0 Unless 
required 
-       by applicable law or agreed to in writing, software distributed under 
the 
-       License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
CONDITIONS 
-       OF ANY KIND, either express or implied. See the License for the 
specific 
-       language governing permissions and limitations under the License. -->
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
 <plugin id="indexer-solr" name="SolrIndexWriter" version="1.0.0"
        provider-name="nutch.apache.org">
 
diff --git a/src/plugin/language-identifier/ivy.xml 
b/src/plugin/language-identifier/ivy.xml
index 68e9ed76e..f64b97055 100644
--- a/src/plugin/language-identifier/ivy.xml
+++ b/src/plugin/language-identifier/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/lib-htmlunit/ivy.xml b/src/plugin/lib-htmlunit/ivy.xml
index b03211667..795e6b335 100644
--- a/src/plugin/lib-htmlunit/ivy.xml
+++ b/src/plugin/lib-htmlunit/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/lib-http/ivy.xml b/src/plugin/lib-http/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/lib-http/ivy.xml
+++ b/src/plugin/lib-http/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/lib-nekohtml/ivy.xml b/src/plugin/lib-nekohtml/ivy.xml
index 072fb05b9..32fcd8c4b 100644
--- a/src/plugin/lib-nekohtml/ivy.xml
+++ b/src/plugin/lib-nekohtml/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/lib-rabbitmq/ivy.xml b/src/plugin/lib-rabbitmq/ivy.xml
index 1b6ceac37..8184530af 100644
--- a/src/plugin/lib-rabbitmq/ivy.xml
+++ b/src/plugin/lib-rabbitmq/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/lib-regex-filter/ivy.xml 
b/src/plugin/lib-regex-filter/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/lib-regex-filter/ivy.xml
+++ b/src/plugin/lib-regex-filter/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/lib-selenium/README.md 
b/src/plugin/lib-selenium/README.md
index 1c6b37c5f..5054d7ad8 100644
--- a/src/plugin/lib-selenium/README.md
+++ b/src/plugin/lib-selenium/README.md
@@ -1,3 +1,20 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
 # Updates
 * The use of phantomjs has been deprecated. Check 
[Wikipedia](https://en.wikipedia.org/wiki/PhantomJS) for more info.
 * The updated code for Safari webriver is under development as starting Safari 
10 on OS X El Capitan and macOS Sierra, Safari comes bundled with a new driver 
implementation.
diff --git a/src/plugin/index-geoip/plugin.xml 
b/src/plugin/lib-selenium/howto_upgrade_selenium.md
similarity index 52%
copy from src/plugin/index-geoip/plugin.xml
copy to src/plugin/lib-selenium/howto_upgrade_selenium.md
index c4efadf94..3071c74cb 100644
--- a/src/plugin/index-geoip/plugin.xml
+++ b/src/plugin/lib-selenium/howto_upgrade_selenium.md
@@ -14,31 +14,19 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 -->
-<plugin
-   id="index-geoip"
-   name="GeoIP2 Indexing Filter"
-   version="1.0.0"
-   provider-name="nutch.org">
-
-
-   <runtime>
-      <library name="index-geoip.jar">
-         <export name="*"/>
-      </library>
-      <library name="geoip2-3.0.1.jar"/>
-      <library name="maxmind-db-2.0.0.jar"/>
-   </runtime>
-
-   <requires>
-      <import plugin="nutch-extensionpoints"/>
-   </requires>
-
-   <extension id="org.apache.nutch.indexer.geoip"
-              name="Nutch GeoIP2 Indexing Filter"
-              point="org.apache.nutch.indexer.IndexingFilter">
-      <implementation id="GeoIPIndexingFilter"
-                      
class="org.apache.nutch.indexer.geoip.GeoIPIndexingFilter"/>
-   </extension>
-
-</plugin>
 
+1. Upgrade various driver versions dependency in 
src/plugin/lib-selenium/ivy.xml
+
+2. Upgrade Selenium's own dependencies in src/plugin/lib-selenium/plugin.xml
+
+   To get a list of dependencies and their versions execute:
+    $ ant -f ./build-ivy.xml
+    $ ls lib | sed 's/^/     <library name="/g' | sed 's/$/">\n       <export 
name="*"\/>\n     <\/library>/g'
+
+   Note that all dependent libraries are exported for a "library" plugin 
("lib-selenium").
+
+   N.B. The above Regex + Sed commands may not work if you are using MacOSX's 
Sed. In this instance you can instal GNU Sed as follows
+
+   $ brew install gnu-sed --with-default-names
+
+   You can then restart your terminal and the Regex + Sed command should work 
just fine!
diff --git a/src/plugin/lib-selenium/howto_upgrade_selenium.txt 
b/src/plugin/lib-selenium/howto_upgrade_selenium.txt
deleted file mode 100644
index 1892a6275..000000000
--- a/src/plugin/lib-selenium/howto_upgrade_selenium.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-1. Upgrade various driver versions dependency in 
src/plugin/lib-selenium/ivy.xml
-
-2. Upgrade Selenium's own dependencies in src/plugin/lib-selenium/plugin.xml
-
-   To get a list of dependencies and their versions execute:
-    $ ant -f ./build-ivy.xml
-    $ ls lib | sed 's/^/     <library name="/g' | sed 's/$/">\n       <export 
name="*"\/>\n     <\/library>/g'
-
-   Note that all dependent libraries are exported for a "library" plugin 
("lib-selenium").
-
-   N.B. The above Regex + Sed commands may not work if you are using MacOSX's 
Sed. In this instance you can instal GNU Sed as follows
-
-   $ brew install gnu-sed --with-default-names
-
-   You can then restart your terminal and the Regex + Sed command should work 
just fine!
diff --git a/src/plugin/lib-selenium/ivy.xml b/src/plugin/lib-selenium/ivy.xml
index 7d3a2d624..0d460cdb4 100644
--- a/src/plugin/lib-selenium/ivy.xml
+++ b/src/plugin/lib-selenium/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/lib-xml/ivy.xml b/src/plugin/lib-xml/ivy.xml
index 9306c4d9b..4e38c4371 100644
--- a/src/plugin/lib-xml/ivy.xml
+++ b/src/plugin/lib-xml/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/microformats-reltag/ivy.xml 
b/src/plugin/microformats-reltag/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/microformats-reltag/ivy.xml
+++ b/src/plugin/microformats-reltag/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/mimetype-filter/ivy.xml 
b/src/plugin/mimetype-filter/ivy.xml
index 624dcaf4a..3d4fc905c 100644
--- a/src/plugin/mimetype-filter/ivy.xml
+++ b/src/plugin/mimetype-filter/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/nutch-extensionpoints/ivy.xml 
b/src/plugin/nutch-extensionpoints/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/nutch-extensionpoints/ivy.xml
+++ b/src/plugin/nutch-extensionpoints/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/parse-ext/command b/src/plugin/parse-ext/command
index f42c05531..329d58d96 100644
--- a/src/plugin/parse-ext/command
+++ b/src/plugin/parse-ext/command
@@ -1,3 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 #!/bin/bash
 #
 # Sample bash script as external command invoked by parse-ext plugin
diff --git a/src/plugin/parse-ext/ivy.xml b/src/plugin/parse-ext/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/parse-ext/ivy.xml
+++ b/src/plugin/parse-ext/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/parse-html/ivy.xml b/src/plugin/parse-html/ivy.xml
index 69aa2eba5..1424c4d7a 100644
--- a/src/plugin/parse-html/ivy.xml
+++ b/src/plugin/parse-html/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/parse-js/ivy.xml b/src/plugin/parse-js/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/parse-js/ivy.xml
+++ b/src/plugin/parse-js/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/parse-js/sample/parse_embedded_js_test.html 
b/src/plugin/parse-js/sample/parse_embedded_js_test.html
index 351beacc3..0409bba53 100644
--- a/src/plugin/parse-js/sample/parse_embedded_js_test.html
+++ b/src/plugin/parse-js/sample/parse_embedded_js_test.html
@@ -1,3 +1,19 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 
"http://www.w3.org/TR/html4/loose.dtd";>
 <html style="font-size: 16px;"><head>
 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
diff --git a/src/plugin/parse-js/sample/parse_pure_js_test.js 
b/src/plugin/parse-js/sample/parse_pure_js_test.js
index f196313f8..0e486a879 100644
--- a/src/plugin/parse-js/sample/parse_pure_js_test.js
+++ b/src/plugin/parse-js/sample/parse_pure_js_test.js
@@ -1,3 +1,18 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 // test data for link extraction from "pure" JavaScript
 
 function selectProvider(form) {
diff --git a/src/plugin/parse-metatags/ivy.xml 
b/src/plugin/parse-metatags/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/parse-metatags/ivy.xml
+++ b/src/plugin/parse-metatags/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/parse-metatags/sample/testMetatags.html 
b/src/plugin/parse-metatags/sample/testMetatags.html
index e9e8e6bd0..4dc86c194 100644
--- a/src/plugin/parse-metatags/sample/testMetatags.html
+++ b/src/plugin/parse-metatags/sample/testMetatags.html
@@ -1,3 +1,19 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
 <html>
 <head>
 <meta name="Keywords" content="This is a test of keywords" />
diff --git a/src/plugin/parse-metatags/sample/testMultivalueMetatags.html 
b/src/plugin/parse-metatags/sample/testMultivalueMetatags.html
index ca8b737c2..36d2c8814 100644
--- a/src/plugin/parse-metatags/sample/testMultivalueMetatags.html
+++ b/src/plugin/parse-metatags/sample/testMultivalueMetatags.html
@@ -1,3 +1,19 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
 <html>
 <head>
 <meta name="DC.creator" content="Doug Cutting">
diff --git a/src/plugin/parse-tika/howto_upgrade_tika.txt 
b/src/plugin/parse-tika/howto_upgrade_tika.md
similarity index 73%
rename from src/plugin/parse-tika/howto_upgrade_tika.txt
rename to src/plugin/parse-tika/howto_upgrade_tika.md
index 46d075948..8ed6c3f3c 100644
--- a/src/plugin/parse-tika/howto_upgrade_tika.txt
+++ b/src/plugin/parse-tika/howto_upgrade_tika.md
@@ -1,3 +1,20 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
 We are currently using a shim (https://github.com/tballison/hadoop-safe-tika
 because of binary conflicts in commons-io versions between what Hadoop 
supports and the more
 modern features that Apache Tika and Apache POI were using in commons-io.
diff --git a/src/plugin/parse-tika/ivy.xml b/src/plugin/parse-tika/ivy.xml
index 1586d9661..b89e812e1 100644
--- a/src/plugin/parse-tika/ivy.xml
+++ b/src/plugin/parse-tika/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/parse-tika/sample/nutch.html 
b/src/plugin/parse-tika/sample/nutch.html
index 0aa7c9895..809853512 100644
--- a/src/plugin/parse-tika/sample/nutch.html
+++ b/src/plugin/parse-tika/sample/nutch.html
@@ -1,3 +1,19 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" 
"http://www.w3.org/TR/html4/loose.dtd";>
 <html>
 <head>
diff --git a/src/plugin/parse-zip/ivy.xml b/src/plugin/parse-zip/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/parse-zip/ivy.xml
+++ b/src/plugin/parse-zip/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/parsefilter-debug/ivy.xml 
b/src/plugin/parsefilter-debug/ivy.xml
index dac80e6d7..82f93c012 100644
--- a/src/plugin/parsefilter-debug/ivy.xml
+++ b/src/plugin/parsefilter-debug/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/parsefilter-naivebayes/ivy.xml 
b/src/plugin/parsefilter-naivebayes/ivy.xml
index c261adac6..66a931543 100644
--- a/src/plugin/parsefilter-naivebayes/ivy.xml
+++ b/src/plugin/parsefilter-naivebayes/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/parsefilter-regex/data/regex-parsefilter.txt 
b/src/plugin/parsefilter-regex/data/regex-parsefilter.txt
index 9d15cd899..fbc7dd303 100644
--- a/src/plugin/parsefilter-regex/data/regex-parsefilter.txt
+++ b/src/plugin/parsefilter-regex/data/regex-parsefilter.txt
@@ -1,3 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Example configuration file for parsefilter-regex
 #
 # Parse metadata field <name> is set to true if the HTML matches the regex. The
diff --git a/src/plugin/parsefilter-regex/ivy.xml 
b/src/plugin/parsefilter-regex/ivy.xml
index e82f92861..f33a31178 100644
--- a/src/plugin/parsefilter-regex/ivy.xml
+++ b/src/plugin/parsefilter-regex/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/protocol-file/ivy.xml b/src/plugin/protocol-file/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/protocol-file/ivy.xml
+++ b/src/plugin/protocol-file/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/protocol-file/sample/testprotocolfile.txt 
b/src/plugin/protocol-file/sample/testprotocolfile.txt
index fbe8a8acf..5e684e2f4 100644
--- a/src/plugin/protocol-file/sample/testprotocolfile.txt
+++ b/src/plugin/protocol-file/sample/testprotocolfile.txt
@@ -1 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 Protocol File Test
diff --git a/src/plugin/protocol-file/sample/testprotocolfile_(encoded).txt 
b/src/plugin/protocol-file/sample/testprotocolfile_(encoded).txt
index fbe8a8acf..5e684e2f4 100644
--- a/src/plugin/protocol-file/sample/testprotocolfile_(encoded).txt
+++ b/src/plugin/protocol-file/sample/testprotocolfile_(encoded).txt
@@ -1 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 Protocol File Test
diff --git a/src/plugin/protocol-foo/ivy.xml b/src/plugin/protocol-foo/ivy.xml
index 1a86d6803..99b173446 100755
--- a/src/plugin/protocol-foo/ivy.xml
+++ b/src/plugin/protocol-foo/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/protocol-foo/plugin.xml 
b/src/plugin/protocol-foo/plugin.xml
index d34f6242a..954a2d41a 100755
--- a/src/plugin/protocol-foo/plugin.xml
+++ b/src/plugin/protocol-foo/plugin.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" encoding="UTF-8"?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/protocol-ftp/ivy.xml b/src/plugin/protocol-ftp/ivy.xml
index 8e1c257d6..1fbfe97f0 100644
--- a/src/plugin/protocol-ftp/ivy.xml
+++ b/src/plugin/protocol-ftp/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/protocol-htmlunit/ivy.xml 
b/src/plugin/protocol-htmlunit/ivy.xml
index dde1fe88f..fa787376b 100644
--- a/src/plugin/protocol-htmlunit/ivy.xml
+++ b/src/plugin/protocol-htmlunit/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/protocol-http/ivy.xml b/src/plugin/protocol-http/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/protocol-http/ivy.xml
+++ b/src/plugin/protocol-http/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/protocol-httpclient/ivy.xml 
b/src/plugin/protocol-httpclient/ivy.xml
index 378bd7c42..e3e515dd9 100644
--- a/src/plugin/protocol-httpclient/ivy.xml
+++ b/src/plugin/protocol-httpclient/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/protocol-interactiveselenium/README.md 
b/src/plugin/protocol-interactiveselenium/README.md
index dd43ee794..545efb830 100644
--- a/src/plugin/protocol-interactiveselenium/README.md
+++ b/src/plugin/protocol-interactiveselenium/README.md
@@ -1,3 +1,20 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
 Nutch Interactive Selenium
 ==========================
 
diff --git a/src/plugin/protocol-interactiveselenium/ivy.xml 
b/src/plugin/protocol-interactiveselenium/ivy.xml
index 506be0aec..112483bcd 100644
--- a/src/plugin/protocol-interactiveselenium/ivy.xml
+++ b/src/plugin/protocol-interactiveselenium/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/protocol-okhttp/howto_upgrade_okhttp.txt 
b/src/plugin/protocol-okhttp/howto_upgrade_okhttp.md
similarity index 52%
rename from src/plugin/protocol-okhttp/howto_upgrade_okhttp.txt
rename to src/plugin/protocol-okhttp/howto_upgrade_okhttp.md
index b3b6f1f22..16ae70d71 100644
--- a/src/plugin/protocol-okhttp/howto_upgrade_okhttp.txt
+++ b/src/plugin/protocol-okhttp/howto_upgrade_okhttp.md
@@ -1,3 +1,20 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
 1. Upgrade OkHttp dependency in src/plugin/protocol-okhttp/ivy.xml
 
 2. Upgrade OkHttp's own dependencies in src/plugin/protocol-okhttp/plugin.xml
diff --git a/src/plugin/protocol-okhttp/ivy.xml 
b/src/plugin/protocol-okhttp/ivy.xml
index ead823247..73b4fa636 100644
--- a/src/plugin/protocol-okhttp/ivy.xml
+++ b/src/plugin/protocol-okhttp/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/protocol-selenium/README.md 
b/src/plugin/protocol-selenium/README.md
index 05132b9ef..4d43c330d 100644
--- a/src/plugin/protocol-selenium/README.md
+++ b/src/plugin/protocol-selenium/README.md
@@ -1,3 +1,20 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
 Nutch Selenium
 ==============
 
diff --git a/src/plugin/protocol-selenium/ivy.xml 
b/src/plugin/protocol-selenium/ivy.xml
index 506be0aec..112483bcd 100644
--- a/src/plugin/protocol-selenium/ivy.xml
+++ b/src/plugin/protocol-selenium/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/publish-rabbitmq/ivy.xml 
b/src/plugin/publish-rabbitmq/ivy.xml
index 7b5e3dd3c..008cdb1ca 100644
--- a/src/plugin/publish-rabbitmq/ivy.xml
+++ b/src/plugin/publish-rabbitmq/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/scoring-depth/ivy.xml b/src/plugin/scoring-depth/ivy.xml
index 1275664e5..cb5a0f186 100644
--- a/src/plugin/scoring-depth/ivy.xml
+++ b/src/plugin/scoring-depth/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/scoring-link/ivy.xml b/src/plugin/scoring-link/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/scoring-link/ivy.xml
+++ b/src/plugin/scoring-link/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/scoring-metadata/ivy.xml 
b/src/plugin/scoring-metadata/ivy.xml
index 24d76063d..6fa1a2c06 100644
--- a/src/plugin/scoring-metadata/ivy.xml
+++ b/src/plugin/scoring-metadata/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/scoring-opic/ivy.xml b/src/plugin/scoring-opic/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/scoring-opic/ivy.xml
+++ b/src/plugin/scoring-opic/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/scoring-orphan/ivy.xml 
b/src/plugin/scoring-orphan/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/scoring-orphan/ivy.xml
+++ b/src/plugin/scoring-orphan/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/scoring-similarity/ivy.xml 
b/src/plugin/scoring-similarity/ivy.xml
index 1acd1d442..1a1945f57 100644
--- a/src/plugin/scoring-similarity/ivy.xml
+++ b/src/plugin/scoring-similarity/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/subcollection/ivy.xml b/src/plugin/subcollection/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/subcollection/ivy.xml
+++ b/src/plugin/subcollection/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/tld/ivy.xml b/src/plugin/tld/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/tld/ivy.xml
+++ b/src/plugin/tld/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/urlfilter-automaton/ivy.xml 
b/src/plugin/urlfilter-automaton/ivy.xml
index 6b07ba33b..e9b1e892f 100644
--- a/src/plugin/urlfilter-automaton/ivy.xml
+++ b/src/plugin/urlfilter-automaton/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/urlfilter-domain/data/hosts.txt 
b/src/plugin/urlfilter-domain/data/hosts.txt
index 2b88c3b05..8cf43745f 100644
--- a/src/plugin/urlfilter-domain/data/hosts.txt
+++ b/src/plugin/urlfilter-domain/data/hosts.txt
@@ -1,3 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # comments start with the pound sign
 net
 apache.org
diff --git a/src/plugin/urlfilter-domain/ivy.xml 
b/src/plugin/urlfilter-domain/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/urlfilter-domain/ivy.xml
+++ b/src/plugin/urlfilter-domain/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/urlfilter-domaindenylist/data/hosts.txt 
b/src/plugin/urlfilter-domaindenylist/data/hosts.txt
index 2b88c3b05..8cf43745f 100644
--- a/src/plugin/urlfilter-domaindenylist/data/hosts.txt
+++ b/src/plugin/urlfilter-domaindenylist/data/hosts.txt
@@ -1,3 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # comments start with the pound sign
 net
 apache.org
diff --git a/src/plugin/urlfilter-domaindenylist/ivy.xml 
b/src/plugin/urlfilter-domaindenylist/ivy.xml
index 1275664e5..cb5a0f186 100644
--- a/src/plugin/urlfilter-domaindenylist/ivy.xml
+++ b/src/plugin/urlfilter-domaindenylist/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/urlfilter-fast/README.md 
b/src/plugin/urlfilter-fast/README.md
index 46b293fe8..2e5860575 100644
--- a/src/plugin/urlfilter-fast/README.md
+++ b/src/plugin/urlfilter-fast/README.md
@@ -1,3 +1,19 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
 
 Filters URLs based on a file of regular expressions using host/domains
 matching first. The default policy is to accept a URL if no matches
diff --git a/src/plugin/urlfilter-fast/ivy.xml 
b/src/plugin/urlfilter-fast/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/urlfilter-fast/ivy.xml
+++ b/src/plugin/urlfilter-fast/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/urlfilter-ignoreexempt/README.md 
b/src/plugin/urlfilter-ignoreexempt/README.md
index d48b6729f..a8f932e75 100644
--- a/src/plugin/urlfilter-ignoreexempt/README.md
+++ b/src/plugin/urlfilter-ignoreexempt/README.md
@@ -1,3 +1,20 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
 urlfilter-ignoreexempt
 ======================
   This plugin allows certain urls to be exempted when the external links are 
configured to be ignored.
diff --git a/src/plugin/urlfilter-ignoreexempt/ivy.xml 
b/src/plugin/urlfilter-ignoreexempt/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/urlfilter-ignoreexempt/ivy.xml
+++ b/src/plugin/urlfilter-ignoreexempt/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/urlfilter-prefix/ivy.xml 
b/src/plugin/urlfilter-prefix/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/urlfilter-prefix/ivy.xml
+++ b/src/plugin/urlfilter-prefix/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/urlfilter-regex/ivy.xml 
b/src/plugin/urlfilter-regex/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/urlfilter-regex/ivy.xml
+++ b/src/plugin/urlfilter-regex/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/urlfilter-suffix/ivy.xml 
b/src/plugin/urlfilter-suffix/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/urlfilter-suffix/ivy.xml
+++ b/src/plugin/urlfilter-suffix/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/urlfilter-validator/ivy.xml 
b/src/plugin/urlfilter-validator/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/urlfilter-validator/ivy.xml
+++ b/src/plugin/urlfilter-validator/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/urlmeta/ivy.xml b/src/plugin/urlmeta/ivy.xml
index 1275664e5..cb5a0f186 100644
--- a/src/plugin/urlmeta/ivy.xml
+++ b/src/plugin/urlmeta/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/urlnormalizer-ajax/ivy.xml 
b/src/plugin/urlnormalizer-ajax/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/urlnormalizer-ajax/ivy.xml
+++ b/src/plugin/urlnormalizer-ajax/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/urlnormalizer-basic/ivy.xml 
b/src/plugin/urlnormalizer-basic/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/urlnormalizer-basic/ivy.xml
+++ b/src/plugin/urlnormalizer-basic/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/urlnormalizer-host/data/hosts.txt 
b/src/plugin/urlnormalizer-host/data/hosts.txt
index c7e0ccfe6..b81edae14 100644
--- a/src/plugin/urlnormalizer-host/data/hosts.txt
+++ b/src/plugin/urlnormalizer-host/data/hosts.txt
@@ -1,3 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Force all sub domains to www.
 *.example.com example.com
 
diff --git a/src/plugin/urlnormalizer-host/ivy.xml 
b/src/plugin/urlnormalizer-host/ivy.xml
index 624dcaf4a..3d4fc905c 100644
--- a/src/plugin/urlnormalizer-host/ivy.xml
+++ b/src/plugin/urlnormalizer-host/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/urlnormalizer-pass/ivy.xml 
b/src/plugin/urlnormalizer-pass/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/urlnormalizer-pass/ivy.xml
+++ b/src/plugin/urlnormalizer-pass/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/urlnormalizer-protocol/data/protocols.txt 
b/src/plugin/urlnormalizer-protocol/data/protocols.txt
index fc7d86cbd..159917252 100644
--- a/src/plugin/urlnormalizer-protocol/data/protocols.txt
+++ b/src/plugin/urlnormalizer-protocol/data/protocols.txt
@@ -1,3 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Example configuration file for urlnormalizer-protocol
 #
 # URL's of hosts listed in the configuration are normalized to the target
diff --git a/src/plugin/urlnormalizer-protocol/ivy.xml 
b/src/plugin/urlnormalizer-protocol/ivy.xml
index 624dcaf4a..3d4fc905c 100644
--- a/src/plugin/urlnormalizer-protocol/ivy.xml
+++ b/src/plugin/urlnormalizer-protocol/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/urlnormalizer-querystring/ivy.xml 
b/src/plugin/urlnormalizer-querystring/ivy.xml
index 624dcaf4a..3d4fc905c 100644
--- a/src/plugin/urlnormalizer-querystring/ivy.xml
+++ b/src/plugin/urlnormalizer-querystring/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/urlnormalizer-regex/ivy.xml 
b/src/plugin/urlnormalizer-regex/ivy.xml
index 956fd25ef..5c2c5b77e 100644
--- a/src/plugin/urlnormalizer-regex/ivy.xml
+++ b/src/plugin/urlnormalizer-regex/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/plugin/urlnormalizer-regex/sample/regex-normalize-default.test 
b/src/plugin/urlnormalizer-regex/sample/regex-normalize-default.test
index 7867ad80e..8560961c0 100644
--- a/src/plugin/urlnormalizer-regex/sample/regex-normalize-default.test
+++ b/src/plugin/urlnormalizer-regex/sample/regex-normalize-default.test
@@ -1,3 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # test simple removal of session id, keeping parameters before and after
 http://foo.com/foo.php?PHPSESSID=cdc993a493e899bed04f4d0c8a462a03 
http://foo.com/foo.php
 http://foo.com/foo.php?f=2&PHPSESSID=cdc993a493e899bed04f4d0c8a462a03 
http://foo.com/foo.php?f=2
diff --git a/src/plugin/urlnormalizer-regex/sample/regex-normalize-scope1.test 
b/src/plugin/urlnormalizer-regex/sample/regex-normalize-scope1.test
index 9d928802e..9905e683d 100644
--- a/src/plugin/urlnormalizer-regex/sample/regex-normalize-scope1.test
+++ b/src/plugin/urlnormalizer-regex/sample/regex-normalize-scope1.test
@@ -1,3 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # test removal of subdomains
 http://www.foo.bar.com/ http://bar.com/
 
diff --git a/src/plugin/urlnormalizer-slash/data/slashes.txt 
b/src/plugin/urlnormalizer-slash/data/slashes.txt
index d3bd70a66..efcdafb63 100644
--- a/src/plugin/urlnormalizer-slash/data/slashes.txt
+++ b/src/plugin/urlnormalizer-slash/data/slashes.txt
@@ -1,3 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Both domains have duplicate URL's, some with slashes and some without
 
 # We prefer this domain with slashes
diff --git a/src/plugin/urlnormalizer-slash/ivy.xml 
b/src/plugin/urlnormalizer-slash/ivy.xml
index 624dcaf4a..3d4fc905c 100644
--- a/src/plugin/urlnormalizer-slash/ivy.xml
+++ b/src/plugin/urlnormalizer-slash/ivy.xml
@@ -1,5 +1,4 @@
 <?xml version="1.0" ?>
-
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
diff --git a/src/test/crawl-tests.xml b/src/test/crawl-tests.xml
index 01fc68301..b1e38ad3a 100644
--- a/src/test/crawl-tests.xml
+++ b/src/test/crawl-tests.xml
@@ -1,4 +1,20 @@
 <?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
 
 <!-- Configuration overrides used during unit tests. -->
 
diff --git a/src/test/filter-all.txt b/src/test/filter-all.txt
index 4ed567ab1..d738aec76 100644
--- a/src/test/filter-all.txt
+++ b/src/test/filter-all.txt
@@ -1,3 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Config file for urlfilter-suffix plugin
 # Filter away all urls
 
diff --git a/src/test/log4j.properties b/src/test/log4j.properties
index 3ff115f46..08e272c71 100644
--- a/src/test/log4j.properties
+++ b/src/test/log4j.properties
@@ -1,3 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # log4j configuration used during build and unit tests
 
 log4j.rootLogger=info,stdout
diff --git a/src/test/nutch-site.xml b/src/test/nutch-site.xml
index dd408739d..0d6177e5e 100644
--- a/src/test/nutch-site.xml
+++ b/src/test/nutch-site.xml
@@ -1,4 +1,20 @@
 <?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
 
 <!-- Configuration overrides used during unit tests. -->
 
diff --git a/src/testresources/fetch-test-site/dup_of_pagea.html 
b/src/testresources/fetch-test-site/dup_of_pagea.html
index 6444c4122..63c4e6153 100644
--- a/src/testresources/fetch-test-site/dup_of_pagea.html
+++ b/src/testresources/fetch-test-site/dup_of_pagea.html
@@ -1,3 +1,19 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
 <html>
  <head>
   <title>page a</title>
diff --git a/src/testresources/fetch-test-site/exception.html 
b/src/testresources/fetch-test-site/exception.html
index e1192a176..66f134ee2 100644
--- a/src/testresources/fetch-test-site/exception.html
+++ b/src/testresources/fetch-test-site/exception.html
@@ -1,3 +1,19 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
 <HTML>
 <HEAD>
diff --git a/src/testresources/fetch-test-site/index.html 
b/src/testresources/fetch-test-site/index.html
index d73ff3f69..3fc6e61e5 100644
--- a/src/testresources/fetch-test-site/index.html
+++ b/src/testresources/fetch-test-site/index.html
@@ -1,3 +1,19 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
 <html>
  <head>
   <title>front page</title>
diff --git a/src/testresources/fetch-test-site/nested_spider_trap.html 
b/src/testresources/fetch-test-site/nested_spider_trap.html
index 5dcf7c220..dd32ee236 100644
--- a/src/testresources/fetch-test-site/nested_spider_trap.html
+++ b/src/testresources/fetch-test-site/nested_spider_trap.html
@@ -1,3 +1,19 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
 <html>
 <head>
 <title>nested spider trap</title>
diff --git a/src/testresources/fetch-test-site/pagea.html 
b/src/testresources/fetch-test-site/pagea.html
index 6444c4122..63c4e6153 100644
--- a/src/testresources/fetch-test-site/pagea.html
+++ b/src/testresources/fetch-test-site/pagea.html
@@ -1,3 +1,19 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
 <html>
  <head>
   <title>page a</title>
diff --git a/src/testresources/fetch-test-site/pageb.html 
b/src/testresources/fetch-test-site/pageb.html
index 66e3725ef..cf77ff4f7 100644
--- a/src/testresources/fetch-test-site/pageb.html
+++ b/src/testresources/fetch-test-site/pageb.html
@@ -1,3 +1,19 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
 <html>
  <head>
   <title>bage b</title>
diff --git a/src/testresources/fetch-test-site/robots.txt 
b/src/testresources/fetch-test-site/robots.txt
index e69de29bb..fc590f973 100644
--- a/src/testresources/fetch-test-site/robots.txt
+++ b/src/testresources/fetch-test-site/robots.txt
@@ -0,0 +1,14 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
\ No newline at end of file

Reply via email to