Repository: nutch Updated Branches: refs/heads/NUTCH-2292 3f1cf76fb -> 9173fd4d6
fixed few test dependencies and configured runtime/local/lib the old way Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/9173fd4d Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/9173fd4d Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/9173fd4d Branch: refs/heads/NUTCH-2292 Commit: 9173fd4d67d1f1429a063e523e02e476cd38c44b Parents: 3f1cf76 Author: Thamme Gowda <[email protected]> Authored: Wed Jul 6 10:11:33 2016 -0700 Committer: Thamme Gowda <[email protected]> Committed: Wed Jul 6 10:11:33 2016 -0700 ---------------------------------------------------------------------- nutch-core/pom.xml | 21 +++++++++++++- .../org/apache/nutch/test/IntegrationTest.java | 6 ++++ .../java/org/apache/nutch/test/TestUtils.java | 29 ++++++++++++++++++++ .../nutch/TestCCParseFilter.java | 10 +++---- nutch-plugins/index-replace/pom.xml | 12 ++++++++ nutch-plugins/lib-nekohtml/pom.xml | 7 +++++ nutch-plugins/lib-regex-filter/pom.xml | 16 +++++++++++ nutch-plugins/parse-tika/pom.xml | 9 ++++++ nutch-plugins/pom.xml | 10 ++++++- nutch-plugins/protocol-http/pom.xml | 12 ++++++++ nutch-plugins/protocol-httpclient/pom.xml | 12 ++++++++ nutch-plugins/urlfilter-automaton/pom.xml | 8 ++++++ nutch-plugins/urlfilter-regex/pom.xml | 7 +++++ pom.xml | 25 ++++++++++++++++- 14 files changed, 176 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/nutch/blob/9173fd4d/nutch-core/pom.xml ---------------------------------------------------------------------- diff --git a/nutch-core/pom.xml b/nutch-core/pom.xml index ff05092..e358f50 100644 --- a/nutch-core/pom.xml +++ b/nutch-core/pom.xml @@ -34,7 +34,7 @@ such as a crawler, a link-graph database etc. </description> <url>http://nutch.apache.org</url> - <licenses> + <licenses> <license> <name>The Apache Software License, Version 2.0</name> <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url> @@ -113,6 +113,7 @@ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <slf4j.version>1.7.12</slf4j.version> <junit.version>4.12</junit.version> + <libs.dir>${project.parent.basedir}${file.separator}${libs.subdir}</libs.dir> </properties> <dependencies> @@ -452,4 +453,22 @@ <optional>true</optional> </dependency> </dependencies> + + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-jar-plugin</artifactId> + <version>2.6</version> + <executions> + <execution> + <goals> + <goal>test-jar</goal> + </goals> + </execution> + </executions> + </plugin> + </plugins> + </build> + </project> http://git-wip-us.apache.org/repos/asf/nutch/blob/9173fd4d/nutch-core/src/test/java/org/apache/nutch/test/IntegrationTest.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/test/java/org/apache/nutch/test/IntegrationTest.java b/nutch-core/src/test/java/org/apache/nutch/test/IntegrationTest.java new file mode 100644 index 0000000..131b667 --- /dev/null +++ b/nutch-core/src/test/java/org/apache/nutch/test/IntegrationTest.java @@ -0,0 +1,6 @@ +package org.apache.nutch.test; + +/** + * A marker interface for marking integration tests + */ +public interface IntegrationTest {} http://git-wip-us.apache.org/repos/asf/nutch/blob/9173fd4d/nutch-core/src/test/java/org/apache/nutch/test/TestUtils.java ---------------------------------------------------------------------- diff --git a/nutch-core/src/test/java/org/apache/nutch/test/TestUtils.java b/nutch-core/src/test/java/org/apache/nutch/test/TestUtils.java new file mode 100644 index 0000000..87d37a5 --- /dev/null +++ b/nutch-core/src/test/java/org/apache/nutch/test/TestUtils.java @@ -0,0 +1,29 @@ +package org.apache.nutch.test; + +import java.io.File; +import java.io.FileNotFoundException; +import java.net.URISyntaxException; +import java.net.URL; + +public class TestUtils { + + /** + * + * @param obj an object whose class's loader should be used + * @param fileName name of file + * @return File instance + * @throws FileNotFoundException when an error occurs or file is not found + */ + public static File getFile(Object obj, String fileName) + throws FileNotFoundException { + try { + URL resource = obj.getClass().getClassLoader().getResource(fileName); + if (resource == null) { + throw new FileNotFoundException(fileName + " not known to classloader of " + obj); + } + return new File(resource.toURI()); + } catch (URISyntaxException e) { + throw new FileNotFoundException(e.getMessage()); + } + } +} http://git-wip-us.apache.org/repos/asf/nutch/blob/9173fd4d/nutch-plugins/creativecommons/src/test/java/org/creativecommons/nutch/TestCCParseFilter.java ---------------------------------------------------------------------- diff --git a/nutch-plugins/creativecommons/src/test/java/org/creativecommons/nutch/TestCCParseFilter.java b/nutch-plugins/creativecommons/src/test/java/org/creativecommons/nutch/TestCCParseFilter.java index 41be9ed..5beb47d 100755 --- a/nutch-plugins/creativecommons/src/test/java/org/creativecommons/nutch/TestCCParseFilter.java +++ b/nutch-plugins/creativecommons/src/test/java/org/creativecommons/nutch/TestCCParseFilter.java @@ -25,24 +25,24 @@ import org.apache.hadoop.conf.Configuration; import org.apache.nutch.util.NutchConfiguration; import org.junit.Assert; import org.junit.Test; +import static org.apache.nutch.test.TestUtils.getFile; import java.io.*; public class TestCCParseFilter { - private static final File testDir = new File(System.getProperty("test.input")); - @Test public void testPages() throws Exception { - pageTest(new File(testDir, "anchor.html"), "http://foo.com/", + + pageTest(getFile(this, "anchor.html"), "http://foo.com/", "http://creativecommons.org/licenses/by-nc-sa/1.0", "a", null); // Tika returns <a> whereas parse-html returns <rel> // check later - pageTest(new File(testDir, "rel.html"), "http://foo.com/", + pageTest(getFile(this, "rel.html"), "http://foo.com/", "http://creativecommons.org/licenses/by-nc/2.0", "rel", null); // Tika returns <a> whereas parse-html returns <rdf> // check later - pageTest(new File(testDir, "rdf.html"), "http://foo.com/", + pageTest(getFile(this, "rdf.html"), "http://foo.com/", "http://creativecommons.org/licenses/by-nc/1.0", "rdf", "text"); } http://git-wip-us.apache.org/repos/asf/nutch/blob/9173fd4d/nutch-plugins/index-replace/pom.xml ---------------------------------------------------------------------- diff --git a/nutch-plugins/index-replace/pom.xml b/nutch-plugins/index-replace/pom.xml index 599170c..d39851d 100644 --- a/nutch-plugins/index-replace/pom.xml +++ b/nutch-plugins/index-replace/pom.xml @@ -34,5 +34,17 @@ <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> </properties> + <dependencies> + <dependency> + <groupId>org.apache.nutch</groupId> + <artifactId>index-basic</artifactId> + <version>${parent.version}</version> + </dependency> + <dependency> + <groupId>org.apache.nutch</groupId> + <artifactId>index-metadata</artifactId> + <version>${parent.version}</version> + </dependency> + </dependencies> </project> http://git-wip-us.apache.org/repos/asf/nutch/blob/9173fd4d/nutch-plugins/lib-nekohtml/pom.xml ---------------------------------------------------------------------- diff --git a/nutch-plugins/lib-nekohtml/pom.xml b/nutch-plugins/lib-nekohtml/pom.xml index e51d61d..df544bb 100644 --- a/nutch-plugins/lib-nekohtml/pom.xml +++ b/nutch-plugins/lib-nekohtml/pom.xml @@ -35,4 +35,11 @@ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> </properties> + <dependencies> + <dependency> + <groupId>net.sourceforge.nekohtml</groupId> + <artifactId>nekohtml</artifactId> + <version>1.9.22</version> + </dependency> + </dependencies> </project> http://git-wip-us.apache.org/repos/asf/nutch/blob/9173fd4d/nutch-plugins/lib-regex-filter/pom.xml ---------------------------------------------------------------------- diff --git a/nutch-plugins/lib-regex-filter/pom.xml b/nutch-plugins/lib-regex-filter/pom.xml index 59f4b10..1074ad7 100644 --- a/nutch-plugins/lib-regex-filter/pom.xml +++ b/nutch-plugins/lib-regex-filter/pom.xml @@ -34,5 +34,21 @@ <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> </properties> + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-jar-plugin</artifactId> + <version>2.6</version> + <executions> + <execution> + <goals> + <goal>test-jar</goal> + </goals> + </execution> + </executions> + </plugin> + </plugins> + </build> </project> http://git-wip-us.apache.org/repos/asf/nutch/blob/9173fd4d/nutch-plugins/parse-tika/pom.xml ---------------------------------------------------------------------- diff --git a/nutch-plugins/parse-tika/pom.xml b/nutch-plugins/parse-tika/pom.xml index f2d9db8..0cf2340 100644 --- a/nutch-plugins/parse-tika/pom.xml +++ b/nutch-plugins/parse-tika/pom.xml @@ -39,6 +39,15 @@ <groupId>org.apache.tika</groupId> <artifactId>tika-parsers</artifactId> <version>1.13</version> + <exclusions> + <!-- TODO --> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.nutch</groupId> + <artifactId>lib-nekohtml</artifactId> + <version>${project.parent.version}</version> + <scope>test</scope> </dependency> </dependencies> http://git-wip-us.apache.org/repos/asf/nutch/blob/9173fd4d/nutch-plugins/pom.xml ---------------------------------------------------------------------- diff --git a/nutch-plugins/pom.xml b/nutch-plugins/pom.xml index a3f5656..e6a6abd 100644 --- a/nutch-plugins/pom.xml +++ b/nutch-plugins/pom.xml @@ -101,14 +101,22 @@ </modules> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> + <libs.dir>..${file.separator}..${file.separator}${libs.subdir}</libs.dir> <!-- Note : one additional level is for the child modules--> </properties> <dependencies> <dependency> <groupId>org.apache.nutch</groupId> <artifactId>nutch-core</artifactId> - <version>1.13-SNAPSHOT</version> + <version>${project.parent.version}</version> <scope>provided</scope> </dependency> + <dependency> + <groupId>org.apache.nutch</groupId> + <artifactId>nutch-core</artifactId> + <version>${project.parent.version}</version> + <scope>test</scope> + <type>test-jar</type> + </dependency> </dependencies> </project> http://git-wip-us.apache.org/repos/asf/nutch/blob/9173fd4d/nutch-plugins/protocol-http/pom.xml ---------------------------------------------------------------------- diff --git a/nutch-plugins/protocol-http/pom.xml b/nutch-plugins/protocol-http/pom.xml index 0d6b1e5..e7ade28 100644 --- a/nutch-plugins/protocol-http/pom.xml +++ b/nutch-plugins/protocol-http/pom.xml @@ -40,6 +40,18 @@ <artifactId>lib-http</artifactId> <version>${project.parent.version}</version> </dependency> + <dependency> + <groupId> org.mortbay.jetty</groupId> + <artifactId>jetty</artifactId> + <version>6.1.26</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId> org.mortbay.jetty</groupId> + <artifactId>jsp-2.1</artifactId> + <version>6.1.14</version> + <scope>test</scope> + </dependency> </dependencies> </project> http://git-wip-us.apache.org/repos/asf/nutch/blob/9173fd4d/nutch-plugins/protocol-httpclient/pom.xml ---------------------------------------------------------------------- diff --git a/nutch-plugins/protocol-httpclient/pom.xml b/nutch-plugins/protocol-httpclient/pom.xml index 4fa2950..2f2fc7c 100644 --- a/nutch-plugins/protocol-httpclient/pom.xml +++ b/nutch-plugins/protocol-httpclient/pom.xml @@ -45,6 +45,18 @@ <artifactId>lib-http</artifactId> <version>${project.parent.version}</version> </dependency> + <dependency> + <groupId> org.mortbay.jetty</groupId> + <artifactId>jetty</artifactId> + <version>6.1.26</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId> org.mortbay.jetty</groupId> + <artifactId>jsp-2.1</artifactId> + <version>6.1.14</version> + <scope>test</scope> + </dependency> </dependencies> </project> http://git-wip-us.apache.org/repos/asf/nutch/blob/9173fd4d/nutch-plugins/urlfilter-automaton/pom.xml ---------------------------------------------------------------------- diff --git a/nutch-plugins/urlfilter-automaton/pom.xml b/nutch-plugins/urlfilter-automaton/pom.xml index 14a2d07..898944e 100644 --- a/nutch-plugins/urlfilter-automaton/pom.xml +++ b/nutch-plugins/urlfilter-automaton/pom.xml @@ -45,6 +45,14 @@ <artifactId>lib-regex-filter</artifactId> <version>${project.parent.version}</version> </dependency> + <dependency> + <groupId>org.apache.nutch</groupId> + <artifactId>lib-regex-filter</artifactId> + <version>${project.parent.version}</version> + <scope>test</scope> + <type>test-jar</type> + </dependency> + </dependencies> </project> http://git-wip-us.apache.org/repos/asf/nutch/blob/9173fd4d/nutch-plugins/urlfilter-regex/pom.xml ---------------------------------------------------------------------- diff --git a/nutch-plugins/urlfilter-regex/pom.xml b/nutch-plugins/urlfilter-regex/pom.xml index 237a5b9..db9e7bd 100644 --- a/nutch-plugins/urlfilter-regex/pom.xml +++ b/nutch-plugins/urlfilter-regex/pom.xml @@ -41,6 +41,13 @@ <artifactId>lib-regex-filter</artifactId> <version>${project.parent.version}</version> </dependency> + <dependency> + <groupId>org.apache.nutch</groupId> + <artifactId>lib-regex-filter</artifactId> + <version>${project.parent.version}</version> + <scope>test</scope> + <type>test-jar</type> + </dependency> </dependencies> </project> http://git-wip-us.apache.org/repos/asf/nutch/blob/9173fd4d/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 8cffbc2..268ab2d 100644 --- a/pom.xml +++ b/pom.xml @@ -10,6 +10,8 @@ <packaging>pom</packaging> <properties> + <libs.subdir>runtime${file.separator}local${file.separator}lib</libs.subdir> + <libs.dir>${project.basedir}${file.separator}${libs.subdir}</libs.dir> <junit.version>4.12</junit.version> </properties> <modules> @@ -32,12 +34,33 @@ <configuration> <filesets> <fileset> - <directory>runtime/</directory> + <directory>runtime</directory> <followSymlinks>false</followSymlinks> </fileset> </filesets> </configuration> </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-dependency-plugin</artifactId> + <version>2.10</version> + <executions> + <execution> + <id>copy-dependencies</id> + <phase>package</phase> + <goals> + <goal>copy-dependencies</goal> + </goals> + <configuration> + <outputDirectory>${libs.dir}</outputDirectory> + <overWriteReleases>false</overWriteReleases> + <overWriteSnapshots>false</overWriteSnapshots> + <overWriteIfNewer>true</overWriteIfNewer> + <includeScope>runtime</includeScope> + </configuration> + </execution> + </executions> + </plugin> </plugins> </build> <dependencies>
