This is an automated email from the ASF dual-hosted git repository. lewismc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push: new ebf348c Prepare for Nutch 1.19-SNAPSHOT development ebf348c is described below commit ebf348cc6ec88a15ca0243c12fe18c31157ede89 Author: Lewis John McGibbney <lewis.mcgibb...@gmail.com> AuthorDate: Mon Jan 25 20:05:00 2021 -0800 Prepare for Nutch 1.19-SNAPSHOT development --- CHANGES.txt | 49 +++++++++++++++++++++++++++++++++++++++++++++++-- NOTICE.txt | 2 +- build.xml | 25 ++++++++++++++++--------- conf/nutch-default.xml | 2 +- default.properties | 4 ++-- ivy/mvn.template | 12 +++++++++++- src/bin/nutch | 2 +- 7 files changed, 79 insertions(+), 17 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index e5c5984..9946bc9 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,10 +1,55 @@ # Nutch Change Log -Nutch 1.18 Development +Nutch 1.18 Release 14/01/2021 (dd/mm/yyyy) +Release Report: https://s.apache.org/lqara Breaking Changes - - As part of NUTCH-2805, the plugin urlfilter-domainblacklist has been renamed to urlfilter-domaindenylist. And the fields required for the plugin urlfilter.domainblacklist.rules and urlfilter.domainblacklist.file has been replaced with urlfilter.domaindenylist.rules and urlfilter.domaindenylist.file respectively. See NUTCH-2802 for more details. + - As part of NUTCH-2805, the plugin urlfilter-domainblacklist has been renamed to urlfilter-domaindenylist. And the fields required for the plugin urlfilter.domainblacklist.rules and urlfilter.domainblacklist.file has been replaced with urlfilter.domaindenylist.rules and urlfilter.domaindenylist.file respectively. See NUTCH-2802 for more details. + +Sub-task + + [NUTCH-2671] - Upgrade ant ivy library + [NUTCH-2672] - Ant build erronously installs *-test.jar instead *.jar for target "nightly" + [NUTCH-2805] - Rename plugin urlfilter-domainblacklist + [NUTCH-2809] - Upgrade any23 plugin dependency to 2.4 + [NUTCH-2816] - Add Spotbugs target to ant build + [NUTCH-2817] - Avoid check for equality of URL path and file part using ==/!= + [NUTCH-2829] - Fix ant target "clean-cache" + +Bug + + [NUTCH-2669] - Reliable solution for javax.ws packaging.type + [NUTCH-2697] - Upgrade Ivy to fix the issue of an unset packaging.type property + [NUTCH-2801] - RobotsRulesParser command-line checker to use http.robots.agents as fall-back + [NUTCH-2810] - FreeGenerator to actually apply configured number of fetch lists + [NUTCH-2813] - MoreIndexingFilter - can't parse erroneous date - 2019-07-03T10:28:14 + [NUTCH-2814] - HttpDateFormat's internal time zone may change after parsing a date + [NUTCH-2818] - Ant build: upgrade Apache Rat report task + [NUTCH-2823] - IllegalStateException in IndexWriters.describe() when validating url param for SolrIndexer + [NUTCH-2824] - urlnormalizer-basic to unescape percent-encoded host names + +Improvement + + [NUTCH-1190] - MoreIndexingFilter refactor: move data formats used to parse "lastModified" to a config file. + [NUTCH-2582] - Set pool size of XML SAX parsers used for MIME detection in Tika 1.19 + [NUTCH-2730] - SitemapProcessor to treat sitemap URLs as Set instead of List + [NUTCH-2782] - protocol-http / lib-http: support TLSv1.3 + [NUTCH-2796] - Upgrade to crawler-commons 1.1 + [NUTCH-2799] - Add .asf.yaml file + [NUTCH-2833] - Upgrade to Tika 1.25 + [NUTCH-2835] - Upgrade commons-jexl from 2 --> 3 + [NUTCH-2836] - Upgrade various commons dependencies + [NUTCH-2837] - Update multiple dependencies + [NUTCH-2841] - Upgrade xercesImpl dependency + +Wish + + [NUTCH-2834] - Deduplication mode via command line in crawl script + +Task + + [NUTCH-2830] - Upgrade any23 to v2.4 Nutch 1.17 Release 18/06/2020 (dd/mm/yyyy) Release Report: https://s.apache.org/ovhry diff --git a/NOTICE.txt b/NOTICE.txt index 71f29fa..1c9efd0 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -1,5 +1,5 @@ Apache Nutch -Copyright 2020 The Apache Software Foundation +Copyright 2021 The Apache Software Foundation This product includes software developed by The Apache Software Foundation (http://www.apache.org/). diff --git a/build.xml b/build.xml index 62ed5d1..68a0f44 100644 --- a/build.xml +++ b/build.xml @@ -37,6 +37,8 @@ <property name="maven-javadoc-jar" value="${release.dir}/${artifactId}-${version}-javadoc.jar" /> <property name="maven-sources-jar" value="${release.dir}/${artifactId}-${version}-sources.jar" /> + <property environment="env"/> + <property name="dependency-check.home" value="${ivy.dir}/dependency-check-ant/"/> <property name="spotbugs.version" value="4.1.1" /> @@ -311,8 +313,9 @@ </ivy:makepom> <!-- sign and deploy the main artifact --> - <artifact:mvn> - <arg value="org.apache.maven.plugins:maven-gpg-plugin:1.5:sign-and-deploy-file" /> + <artifact:mvn mavenHome="${env.MVN_HOME}" fork="true" failonerror="true"> + <jvmarg value="-Dmaven.multiModuleProjectDirectory=false" /> + <arg value="org.apache.maven.plugins:maven-gpg-plugin:1.6:sign-and-deploy-file" /> <arg value="-Durl=${maven-repository-url}" /> <arg value="-DrepositoryId=${maven-repository-id}" /> <arg value="-DpomFile=pom.xml" /> @@ -321,8 +324,9 @@ </artifact:mvn> <!-- sign and deploy the sources artifact --> - <artifact:mvn> - <arg value="org.apache.maven.plugins:maven-gpg-plugin:1.5:sign-and-deploy-file" /> + <artifact:mvn mavenHome="${env.MVN_HOME}" fork="true" failonerror="true"> + <jvmarg value="-Dmaven.multiModuleProjectDirectory=false" /> + <arg value="org.apache.maven.plugins:maven-gpg-plugin:1.6:sign-and-deploy-file" /> <arg value="-Durl=${maven-repository-url}" /> <arg value="-DrepositoryId=${maven-repository-id}" /> <arg value="-DpomFile=pom.xml" /> @@ -332,8 +336,9 @@ </artifact:mvn> <!-- sign and deploy the javadoc artifact --> - <artifact:mvn> - <arg value="org.apache.maven.plugins:maven-gpg-plugin:1.5:sign-and-deploy-file" /> + <artifact:mvn mavenHome="${env.MVN_HOME}" fork="true" failonerror="true"> + <jvmarg value="-Dmaven.multiModuleProjectDirectory=false" /> + <arg value="org.apache.maven.plugins:maven-gpg-plugin:1.6:sign-and-deploy-file" /> <arg value="-Durl=${maven-repository-url}" /> <arg value="-DrepositoryId=${maven-repository-id}" /> <arg value="-DpomFile=pom.xml" /> @@ -362,10 +367,12 @@ </dependency> </artifact:dependencies--> - <artifact:mvn> - <arg value="test"/> + <artifact:mvn mavenHome="${env.MVN_HOME}" fork="true" failonerror="true"> + <jvmarg value="-Dmaven.multiModuleProjectDirectory=false" /> + <arg value="package"/> + <arg value="-DskipTests"/> <arg value="-e"/> - <arg value="-o"/> + <!--arg value="-o"/--> <!-- run offline (-o): must not download dependencies as this is done from http://repo1.maven.org/ hardwired in maven-ant-tasks-2.1.3.jar, see NUTCH-2722. diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml index 6932eb5..36c6f86 100644 --- a/conf/nutch-default.xml +++ b/conf/nutch-default.xml @@ -164,7 +164,7 @@ <property> <name>http.agent.version</name> - <value>Nutch-1.18-SNAPSHOT</value> + <value>Nutch-1.19-SNAPSHOT</value> <description>A version string to advertise in the User-Agent header.</description> </property> diff --git a/default.properties b/default.properties index e4b9619..a675853 100644 --- a/default.properties +++ b/default.properties @@ -14,9 +14,9 @@ # limitations under the License. name=apache-nutch -version=1.18-SNAPSHOT +version=1.19-SNAPSHOT final.name=${name}-${version} -year=2020 +year=2021 basedir = ./ src.dir = ./src/java diff --git a/ivy/mvn.template b/ivy/mvn.template index 6d22c84..edfb550 100644 --- a/ivy/mvn.template +++ b/ivy/mvn.template @@ -19,6 +19,11 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.apache</groupId> + <artifactId>apache</artifactId> + <version>23</version> + </parent> <groupId>${ivy.pom.groupId}</groupId> <artifactId>${ivy.pom.artifactId}</artifactId> <packaging>${ivy.pom.packaging}</packaging> @@ -46,6 +51,10 @@ <name>MireDot Releases</name> <url>http://nexus.qmino.com/content/repositories/miredot</url> </pluginRepository> + <pluginRepository> + <id>maven2</id> + <url>https://repo.maven.apache.org/maven2/</url> + </pluginRepository> </pluginRepositories> <developers> @@ -119,6 +128,7 @@ <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> + <version>3.8.1</version> <configuration> <source>1.8</source> <target>1.8</target> @@ -130,7 +140,7 @@ <plugin> <groupId>com.qmino</groupId> <artifactId>miredot-plugin</artifactId> - <version>2.0.1</version> + <version>2.4.0</version> <executions> <execution> <goals> diff --git a/src/bin/nutch b/src/bin/nutch index 7d0d8ee..9240a9e 100755 --- a/src/bin/nutch +++ b/src/bin/nutch @@ -60,7 +60,7 @@ done # if no args specified, show usage if [ $# = 0 ]; then - echo "nutch 1.18-SNAPSHOT" + echo "nutch 1.19-SNAPSHOT" echo "Usage: nutch COMMAND [-Dproperty=value]... [command-specific args]..." echo "where COMMAND is one of:" echo " readdb read / dump crawl db"