This is an automated email from the ASF dual-hosted git repository.
lewismc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push:
new 1156801bc NUTCH-3040 Upgrade to Hadoop 3.4.2 (#866)
1156801bc is described below
commit 1156801bce618980122d3dee5e92ad9587b9c5a4
Author: Lewis John McGibbney <[email protected]>
AuthorDate: Mon Nov 10 07:07:44 2025 -0800
NUTCH-3040 Upgrade to Hadoop 3.4.2 (#866)
---
default.properties | 2 +-
ivy/ivy.xml | 20 +++++++++-----------
.../apache/nutch/service/impl/ConfManagerImpl.java | 2 +-
.../nutch/service/impl/NutchServerPoolExecutor.java | 2 +-
.../apache/nutch/service/resources/SeedResource.java | 2 +-
src/java/org/apache/nutch/tools/FileDumper.java | 2 +-
6 files changed, 14 insertions(+), 16 deletions(-)
diff --git a/default.properties b/default.properties
index 6008563bc..4b5608647 100644
--- a/default.properties
+++ b/default.properties
@@ -43,7 +43,7 @@ test.build.javadoc = ${test.build.dir}/docs/api
javadoc.proxy.host=-J-DproxyHost=
javadoc.proxy.port=-J-DproxyPort=
javadoc.link.java=https://docs.oracle.com/en/java/javase/11/docs/api/
-javadoc.link.hadoop=https://hadoop.apache.org/docs/r3.3.6/api/
+javadoc.link.hadoop=https://hadoop.apache.org/docs/r3.4.2/api/
javadoc.packages=org.apache.nutch.*
dist.dir=./dist
diff --git a/ivy/ivy.xml b/ivy/ivy.xml
index f3c9badde..a68b5890c 100644
--- a/ivy/ivy.xml
+++ b/ivy/ivy.xml
@@ -43,38 +43,36 @@
<dependency org="org.apache.logging.log4j"
name="log4j-slf4j2-impl" rev="2.25.0" conf="*->master" />
<dependency org="org.slf4j" name="slf4j-api" rev="2.0.17"
conf="*->master" />
- <dependency org="org.apache.commons" name="commons-lang3"
rev="3.13.0" conf="*->default" />
+ <dependency org="org.apache.commons" name="commons-lang3"
rev="3.17.0" conf="*->default" />
<dependency org="org.apache.commons"
name="commons-collections4" rev="4.4" conf="*->master" />
<dependency org="org.apache.httpcomponents" name="httpclient"
rev="4.5.14" conf="*->master" />
<!-- commons-httpclient is still required -->
<dependency org="commons-httpclient" name="commons-httpclient"
rev="3.1"/>
<dependency org="commons-codec" name="commons-codec"
rev="1.18.0" conf="*->default" />
- <!-- hadoop 3.4.0 should have 2.11.0; Tika is broken in
distributed mode until then;
- We're currently relying on the hadoop-safe-tika
shim that shades commons-io to upgrade
- Tika
- see https://github.com/apache/nutch/pull/776 -->
- <dependency org="commons-io" name="commons-io" rev="2.11.0"
conf="*->default" />
+
+ <dependency org="commons-io" name="commons-io" rev="2.16.1"
conf="*->default" />
<dependency org="org.apache.commons" name="commons-compress"
rev="1.27.1" conf="*->default" />
<dependency org="org.apache.commons" name="commons-jexl3"
rev="3.5.0" conf="*->default" />
<dependency org="com.tdunning" name="t-digest" rev="3.3" />
<!-- Hadoop Dependencies -->
- <dependency org="org.apache.hadoop" name="hadoop-common"
rev="3.3.6" conf="*->default">
+ <dependency org="org.apache.hadoop" name="hadoop-common"
rev="3.4.2" conf="*->default">
<exclude org="ch.qos.reload4j" name="*"/>
<exclude org="org.slf4j" name="*" />
</dependency>
- <dependency org="org.apache.hadoop" name="hadoop-hdfs"
rev="3.3.6" conf="*->default">
+ <dependency org="org.apache.hadoop" name="hadoop-hdfs"
rev="3.4.2" conf="*->default">
<exclude org="ch.qos.reload4j" name="*"/>
<exclude org="org.slf4j" name="*" />
</dependency>
- <dependency org="org.apache.hadoop"
name="hadoop-mapreduce-client-core" rev="3.3.6" conf="*->default">
+ <dependency org="org.apache.hadoop"
name="hadoop-mapreduce-client-core" rev="3.4.2" conf="*->default">
<exclude org="ch.qos.reload4j" name="*"/>
<exclude org="org.slf4j" name="*" />
</dependency>
- <dependency org="org.apache.hadoop"
name="hadoop-mapreduce-client-jobclient" rev="3.3.6" conf="*->default">
+ <dependency org="org.apache.hadoop"
name="hadoop-mapreduce-client-jobclient" rev="3.4.2" conf="*->default">
<exclude org="ch.qos.reload4j" name="*"/>
<exclude org="org.slf4j" name="*" />
- </dependency><!-- End of Hadoop Dependencies -->
+ </dependency>
+ <!-- End of Hadoop Dependencies -->
<dependency org="org.tallison.tika" name="tika-core-shaded"
rev="2.9.1.0" conf="*->default" transitive="false"/>
diff --git a/src/java/org/apache/nutch/service/impl/ConfManagerImpl.java
b/src/java/org/apache/nutch/service/impl/ConfManagerImpl.java
index ebcc575ac..7db24d319 100644
--- a/src/java/org/apache/nutch/service/impl/ConfManagerImpl.java
+++ b/src/java/org/apache/nutch/service/impl/ConfManagerImpl.java
@@ -23,7 +23,7 @@ import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
-import org.apache.commons.collections.MapUtils;
+import org.apache.commons.collections4.MapUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.service.ConfManager;
diff --git
a/src/java/org/apache/nutch/service/impl/NutchServerPoolExecutor.java
b/src/java/org/apache/nutch/service/impl/NutchServerPoolExecutor.java
index 473062217..1d1e8175b 100644
--- a/src/java/org/apache/nutch/service/impl/NutchServerPoolExecutor.java
+++ b/src/java/org/apache/nutch/service/impl/NutchServerPoolExecutor.java
@@ -23,7 +23,7 @@ import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
-import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.nutch.service.model.response.JobInfo;
diff --git a/src/java/org/apache/nutch/service/resources/SeedResource.java
b/src/java/org/apache/nutch/service/resources/SeedResource.java
index 5c0815bef..a1a555141 100644
--- a/src/java/org/apache/nutch/service/resources/SeedResource.java
+++ b/src/java/org/apache/nutch/service/resources/SeedResource.java
@@ -30,7 +30,7 @@ import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.Response.Status;
-import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.collections4.CollectionUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.nutch.service.NutchServer;
diff --git a/src/java/org/apache/nutch/tools/FileDumper.java
b/src/java/org/apache/nutch/tools/FileDumper.java
index efc7e29c2..79672d2a0 100644
--- a/src/java/org/apache/nutch/tools/FileDumper.java
+++ b/src/java/org/apache/nutch/tools/FileDumper.java
@@ -51,7 +51,7 @@ import org.apache.tika.Tika;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.codehaus.jackson.map.ObjectMapper;
+import com.fasterxml.jackson.databind.ObjectMapper;
/**
* The file dumper tool enables one to reverse generate the raw content from
* Nutch segment data directories.