This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 506ef9a [SPARK-29250][BUILD] Upgrade to Hadoop 3.3.1
506ef9a is described below
commit 506ef9aad7a826e1cac5300eb1c30262f1c46e51
Author: Chao Sun <[email protected]>
AuthorDate: Wed Jun 16 13:28:07 2021 -0700
[SPARK-29250][BUILD] Upgrade to Hadoop 3.3.1
### What changes were proposed in this pull request?
This upgrade default Hadoop version from 3.2.1 to 3.3.1. The changes here
are simply update the version number and dependency file.
### Why are the changes needed?
Hadoop 3.3.1 just came out, which comes with many client-side improvements
such as for S3A/ABFS (20% faster when accessing S3). These are important for
users who want to use Spark in a cloud environment.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
- Existing unit tests in Spark
- Manually tested using my S3 bucket for event log dir:
```
bin/spark-shell \
-c spark.hadoop.fs.s3a.access.key=$AWS_ACCESS_KEY_ID \
-c spark.hadoop.fs.s3a.secret.key=$AWS_SECRET_ACCESS_KEY \
-c spark.eventLog.enabled=true
-c spark.eventLog.dir=s3a://<my-bucket>
```
- Manually tested against docker-based YARN dev cluster, by running
`SparkPi`.
Closes #30135 from sunchao/SPARK-29250.
Authored-by: Chao Sun <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 7 ++++---
pom.xml | 15 ++++++++++++++-
resource-managers/yarn/pom.xml | 16 ++++++++++++++++
.../spark/sql/hive/client/IsolatedClientLoader.scala | 1 +
4 files changed, 35 insertions(+), 4 deletions(-)
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index 3482dd2..51203a0 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -57,9 +57,10 @@ flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar
generex/1.0.2//generex-1.0.2.jar
gson/2.2.4//gson-2.2.4.jar
guava/14.0.1//guava-14.0.1.jar
-hadoop-client-api/3.2.2//hadoop-client-api-3.2.2.jar
-hadoop-client-runtime/3.2.2//hadoop-client-runtime-3.2.2.jar
-hadoop-yarn-server-web-proxy/3.2.2//hadoop-yarn-server-web-proxy-3.2.2.jar
+hadoop-client-api/3.3.1//hadoop-client-api-3.3.1.jar
+hadoop-client-runtime/3.3.1//hadoop-client-runtime-3.3.1.jar
+hadoop-shaded-guava/1.1.1//hadoop-shaded-guava-1.1.1.jar
+hadoop-yarn-server-web-proxy/3.3.1//hadoop-yarn-server-web-proxy-3.3.1.jar
hive-beeline/2.3.9//hive-beeline-2.3.9.jar
hive-cli/2.3.9//hive-cli-2.3.9.jar
hive-common/2.3.9//hive-common-2.3.9.jar
diff --git a/pom.xml b/pom.xml
index ca038b2..7f0af57 100644
--- a/pom.xml
+++ b/pom.xml
@@ -120,7 +120,7 @@
<sbt.project.name>spark</sbt.project.name>
<slf4j.version>1.7.30</slf4j.version>
<log4j.version>1.2.17</log4j.version>
- <hadoop.version>3.2.2</hadoop.version>
+ <hadoop.version>3.3.1</hadoop.version>
<protobuf.version>2.5.0</protobuf.version>
<yarn.version>${hadoop.version}</yarn.version>
<zookeeper.version>3.6.2</zookeeper.version>
@@ -195,6 +195,7 @@
<maven-antrun.version>1.8</maven-antrun.version>
<commons-crypto.version>1.1.0</commons-crypto.version>
<commons-cli.version>1.2</commons-cli.version>
+ <bouncycastle.version>1.60</bouncycastle.version>
<!--
If you are changing Arrow version specification, please check
./python/pyspark/sql/pandas/utils.py, and ./python/setup.py too.
@@ -1192,6 +1193,18 @@
</exclusion>
</exclusions>
</dependency>
+ <dependency>
+ <groupId>org.bouncycastle</groupId>
+ <artifactId>bcprov-jdk15on</artifactId>
+ <version>${bouncycastle.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.bouncycastle</groupId>
+ <artifactId>bcpkix-jdk15on</artifactId>
+ <version>${bouncycastle.version}</version>
+ <scope>test</scope>
+ </dependency>
<!-- Managed up to match Hadoop in HADOOP-16530 -->
<dependency>
<groupId>xerces</groupId>
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index a662953..00ad7e8 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -76,6 +76,22 @@
</dependency>
</dependencies>
</profile>
+ <profile>
+ <id>hadoop-3.2</id>
+ <dependencies>
+ <!-- Used by MiniYARNCluster -->
+ <dependency>
+ <groupId>org.bouncycastle</groupId>
+ <artifactId>bcprov-jdk15on</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.bouncycastle</groupId>
+ <artifactId>bcpkix-jdk15on</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+ </profile>
</profiles>
<dependencies>
diff --git
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index 40ef10b..9aa6a09 100644
---
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -113,6 +113,7 @@ private[hive] object IsolatedClientLoader extends Logging {
def supportsHadoopShadedClient(hadoopVersion: String): Boolean = {
VersionUtils.majorMinorPatchVersion(hadoopVersion).exists {
case (3, 2, v) if v >= 2 => true
+ case (3, 3, v) if v >= 1 => true
case _ => false
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]