This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new ab93729 [SPARK-31381][SPARK-29245][SQL] Upgrade built-in Hive 2.3.6
to 2.3.7
ab93729 is described below
commit ab93729987084ec55f762639a7e7f7cb8dd275e1
Author: Yuming Wang <[email protected]>
AuthorDate: Mon Apr 20 13:38:24 2020 -0700
[SPARK-31381][SPARK-29245][SQL] Upgrade built-in Hive 2.3.6 to 2.3.7
### What changes were proposed in this pull request?
**Hive 2.3.7** fixed these issues:
- HIVE-21508: ClassCastException when initializing HiveMetaStoreClient on
JDK10 or newer
- HIVE-21980:Parsing time can be high in case of deeply nested subqueries
- HIVE-22249: Support Parquet through HCatalog
### Why are the changes needed?
Fix CCE during creating HiveMetaStoreClient in JDK11 environment:
[SPARK-29245](https://issues.apache.org/jira/browse/SPARK-29245).
### Does this PR introduce any user-facing change?
No.
### How was this patch tested?
- [x] Test Jenkins with Hadoop 2.7
(https://github.com/apache/spark/pull/28148#issuecomment-616757840)
- [x] Test Jenkins with Hadoop 3.2 on JDK11
(https://github.com/apache/spark/pull/28148#issuecomment-616294353)
- [x] Manual test with remote hive metastore.
Hive side:
```
export JAVA_HOME=/usr/lib/jdk1.8.0_221
export PATH=$JAVA_HOME/bin:$PATH
cd /usr/lib/hive-2.3.6 # Start Hive metastore with Hive 2.3.6
bin/schematool -dbType derby -initSchema --verbose
bin/hive --service metastore
```
Spark side:
```
export JAVA_HOME=/usr/lib/jdk-11.0.3
export PATH=$JAVA_HOME/bin:$PATH
build/sbt clean package -Phive -Phadoop-3.2 -Phive-thriftserver
export SPARK_PREPEND_CLASSES=true
bin/spark-sql --conf
spark.hadoop.hive.metastore.uris=thrift://localhost:9083
```
Closes #28148 from wangyum/SPARK-31381.
Authored-by: Yuming Wang <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit b11e42663be680a0357f2bf7bd8b16afe313eb5e)
Signed-off-by: Dongjoon Hyun <[email protected]>
---
dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 26 +++++++++++-----------
dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 26 +++++++++++-----------
docs/building-spark.md | 4 ++--
docs/sql-data-sources-hive-tables.md | 8 +++----
docs/sql-migration-guide.md | 2 +-
pom.xml | 4 ++--
.../thriftserver/HiveThriftServer2Suites.scala | 4 ++--
.../org/apache/spark/sql/hive/HiveUtils.scala | 2 +-
.../sql/hive/client/IsolatedClientLoader.scala | 3 ++-
.../org/apache/spark/sql/hive/client/package.scala | 2 +-
10 files changed, 41 insertions(+), 40 deletions(-)
diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index 7921405..beb6c83 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -79,20 +79,20 @@ hadoop-yarn-client/2.7.4//hadoop-yarn-client-2.7.4.jar
hadoop-yarn-common/2.7.4//hadoop-yarn-common-2.7.4.jar
hadoop-yarn-server-common/2.7.4//hadoop-yarn-server-common-2.7.4.jar
hadoop-yarn-server-web-proxy/2.7.4//hadoop-yarn-server-web-proxy-2.7.4.jar
-hive-beeline/2.3.6//hive-beeline-2.3.6.jar
-hive-cli/2.3.6//hive-cli-2.3.6.jar
-hive-common/2.3.6//hive-common-2.3.6.jar
-hive-exec/2.3.6/core/hive-exec-2.3.6-core.jar
-hive-jdbc/2.3.6//hive-jdbc-2.3.6.jar
-hive-llap-common/2.3.6//hive-llap-common-2.3.6.jar
-hive-metastore/2.3.6//hive-metastore-2.3.6.jar
-hive-serde/2.3.6//hive-serde-2.3.6.jar
-hive-shims-0.23/2.3.6//hive-shims-0.23-2.3.6.jar
-hive-shims-common/2.3.6//hive-shims-common-2.3.6.jar
-hive-shims-scheduler/2.3.6//hive-shims-scheduler-2.3.6.jar
-hive-shims/2.3.6//hive-shims-2.3.6.jar
+hive-beeline/2.3.7//hive-beeline-2.3.7.jar
+hive-cli/2.3.7//hive-cli-2.3.7.jar
+hive-common/2.3.7//hive-common-2.3.7.jar
+hive-exec/2.3.7/core/hive-exec-2.3.7-core.jar
+hive-jdbc/2.3.7//hive-jdbc-2.3.7.jar
+hive-llap-common/2.3.7//hive-llap-common-2.3.7.jar
+hive-metastore/2.3.7//hive-metastore-2.3.7.jar
+hive-serde/2.3.7//hive-serde-2.3.7.jar
+hive-shims-0.23/2.3.7//hive-shims-0.23-2.3.7.jar
+hive-shims-common/2.3.7//hive-shims-common-2.3.7.jar
+hive-shims-scheduler/2.3.7//hive-shims-scheduler-2.3.7.jar
+hive-shims/2.3.7//hive-shims-2.3.7.jar
hive-storage-api/2.7.1//hive-storage-api-2.7.1.jar
-hive-vector-code-gen/2.3.6//hive-vector-code-gen-2.3.6.jar
+hive-vector-code-gen/2.3.7//hive-vector-code-gen-2.3.7.jar
hk2-api/2.6.1//hk2-api-2.6.1.jar
hk2-locator/2.6.1//hk2-locator-2.6.1.jar
hk2-utils/2.6.1//hk2-utils-2.6.1.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index fd4bb7e..69c7cdf 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -78,20 +78,20 @@ hadoop-yarn-common/3.2.0//hadoop-yarn-common-3.2.0.jar
hadoop-yarn-registry/3.2.0//hadoop-yarn-registry-3.2.0.jar
hadoop-yarn-server-common/3.2.0//hadoop-yarn-server-common-3.2.0.jar
hadoop-yarn-server-web-proxy/3.2.0//hadoop-yarn-server-web-proxy-3.2.0.jar
-hive-beeline/2.3.6//hive-beeline-2.3.6.jar
-hive-cli/2.3.6//hive-cli-2.3.6.jar
-hive-common/2.3.6//hive-common-2.3.6.jar
-hive-exec/2.3.6/core/hive-exec-2.3.6-core.jar
-hive-jdbc/2.3.6//hive-jdbc-2.3.6.jar
-hive-llap-common/2.3.6//hive-llap-common-2.3.6.jar
-hive-metastore/2.3.6//hive-metastore-2.3.6.jar
-hive-serde/2.3.6//hive-serde-2.3.6.jar
-hive-shims-0.23/2.3.6//hive-shims-0.23-2.3.6.jar
-hive-shims-common/2.3.6//hive-shims-common-2.3.6.jar
-hive-shims-scheduler/2.3.6//hive-shims-scheduler-2.3.6.jar
-hive-shims/2.3.6//hive-shims-2.3.6.jar
+hive-beeline/2.3.7//hive-beeline-2.3.7.jar
+hive-cli/2.3.7//hive-cli-2.3.7.jar
+hive-common/2.3.7//hive-common-2.3.7.jar
+hive-exec/2.3.7/core/hive-exec-2.3.7-core.jar
+hive-jdbc/2.3.7//hive-jdbc-2.3.7.jar
+hive-llap-common/2.3.7//hive-llap-common-2.3.7.jar
+hive-metastore/2.3.7//hive-metastore-2.3.7.jar
+hive-serde/2.3.7//hive-serde-2.3.7.jar
+hive-shims-0.23/2.3.7//hive-shims-0.23-2.3.7.jar
+hive-shims-common/2.3.7//hive-shims-common-2.3.7.jar
+hive-shims-scheduler/2.3.7//hive-shims-scheduler-2.3.7.jar
+hive-shims/2.3.7//hive-shims-2.3.7.jar
hive-storage-api/2.7.1//hive-storage-api-2.7.1.jar
-hive-vector-code-gen/2.3.6//hive-vector-code-gen-2.3.6.jar
+hive-vector-code-gen/2.3.7//hive-vector-code-gen-2.3.7.jar
hk2-api/2.6.1//hk2-api-2.6.1.jar
hk2-locator/2.6.1//hk2-locator-2.6.1.jar
hk2-utils/2.6.1//hk2-utils-2.6.1.jar
diff --git a/docs/building-spark.md b/docs/building-spark.md
index 77ab790..3d12a60 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -83,9 +83,9 @@ Example:
To enable Hive integration for Spark SQL along with its JDBC server and CLI,
add the `-Phive` and `-Phive-thriftserver` profiles to your existing build
options.
-By default Spark will build with Hive 2.3.6.
+By default Spark will build with Hive 2.3.7.
- # With Hive 2.3.6 support
+ # With Hive 2.3.7 support
./build/mvn -Pyarn -Phive -Phive-thriftserver -DskipTests clean package
## Packaging without Hadoop Dependencies for YARN
diff --git a/docs/sql-data-sources-hive-tables.md
b/docs/sql-data-sources-hive-tables.md
index 22514cd..ae3572c 100644
--- a/docs/sql-data-sources-hive-tables.md
+++ b/docs/sql-data-sources-hive-tables.md
@@ -127,10 +127,10 @@ The following options can be used to configure the
version of Hive that is used
<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since
Version</th></tr>
<tr>
<td><code>spark.sql.hive.metastore.version</code></td>
- <td><code>2.3.6</code></td>
+ <td><code>2.3.7</code></td>
<td>
Version of the Hive metastore. Available
- options are <code>0.12.0</code> through <code>2.3.6</code> and
<code>3.0.0</code> through <code>3.1.2</code>.
+ options are <code>0.12.0</code> through <code>2.3.7</code> and
<code>3.0.0</code> through <code>3.1.2</code>.
</td>
<td>1.4.0</td>
</tr>
@@ -142,9 +142,9 @@ The following options can be used to configure the version
of Hive that is used
property can be one of three options:
<ol>
<li><code>builtin</code></li>
- Use Hive 2.3.6, which is bundled with the Spark assembly when
<code>-Phive</code> is
+ Use Hive 2.3.7, which is bundled with the Spark assembly when
<code>-Phive</code> is
enabled. When this option is chosen,
<code>spark.sql.hive.metastore.version</code> must be
- either <code>2.3.6</code> or not defined.
+ either <code>2.3.7</code> or not defined.
<li><code>maven</code></li>
Use Hive jars of specified version downloaded from Maven repositories.
This configuration
is not generally recommended for production deployments.
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 8945c13..854c9ea 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -787,7 +787,7 @@ Python UDF registration is unchanged.
Spark SQL is designed to be compatible with the Hive Metastore, SerDes and
UDFs.
Currently, Hive SerDes and UDFs are based on built-in Hive,
and Spark SQL can be connected to different versions of Hive Metastore
-(from 0.12.0 to 2.3.6 and 3.0.0 to 3.1.2. Also see [Interacting with Different
Versions of Hive
Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)).
+(from 0.12.0 to 2.3.7 and 3.0.0 to 3.1.2. Also see [Interacting with Different
Versions of Hive
Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)).
#### Deploying in Existing Hive Warehouses
{:.no_toc}
diff --git a/pom.xml b/pom.xml
index e5c8724..0547973 100644
--- a/pom.xml
+++ b/pom.xml
@@ -127,8 +127,8 @@
<hive.group>org.apache.hive</hive.group>
<hive.classifier>core</hive.classifier>
<!-- Version used in Maven Hive dependency -->
- <hive.version>2.3.6</hive.version>
- <hive23.version>2.3.6</hive23.version>
+ <hive.version>2.3.7</hive.version>
+ <hive23.version>2.3.7</hive23.version>
<!-- Version used for internal directory structure -->
<hive.version.short>2.3</hive.version.short>
<!-- note that this should be compatible with Kafka brokers version 0.10
and up -->
diff --git
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
index 84eed7b..28f67cb 100644
---
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
+++
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
@@ -545,7 +545,7 @@ class HiveThriftBinaryServerSuite extends
HiveThriftJdbcTest {
}
if (HiveUtils.isHive23) {
- assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.6"))
+ assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.7"))
} else {
assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("1.2.1"))
}
@@ -562,7 +562,7 @@ class HiveThriftBinaryServerSuite extends
HiveThriftJdbcTest {
}
if (HiveUtils.isHive23) {
- assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.6"))
+ assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.7"))
} else {
assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("1.2.1"))
}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index 3c20e68..d0f7988 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -63,7 +63,7 @@ private[spark] object HiveUtils extends Logging {
val HIVE_METASTORE_VERSION = buildConf("spark.sql.hive.metastore.version")
.doc("Version of the Hive metastore. Available options are " +
- "<code>0.12.0</code> through <code>2.3.6</code> and " +
+ "<code>0.12.0</code> through <code>2.3.7</code> and " +
"<code>3.0.0</code> through <code>3.1.2</code>.")
.version("1.4.0")
.stringConf
diff --git
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index 5da7b70..42a0ec0 100644
---
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -104,7 +104,8 @@ private[hive] object IsolatedClientLoader extends Logging {
case "2.0" | "2.0.0" | "2.0.1" => hive.v2_0
case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1
case "2.2" | "2.2.0" => hive.v2_2
- case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" |
"2.3.6" => hive.v2_3
+ case "2.3" | "2.3.0" | "2.3.1" | "2.3.2" | "2.3.3" | "2.3.4" | "2.3.5" |
"2.3.6" | "2.3.7" =>
+ hive.v2_3
case "3.0" | "3.0.0" => hive.v3_0
case "3.1" | "3.1.0" | "3.1.1" | "3.1.2" => hive.v3_1
case version =>
diff --git
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
index 2631911..8526d86 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
@@ -89,7 +89,7 @@ package object client {
// Since HIVE-14496, Hive materialized view need calcite-core.
// For spark, only VersionsSuite currently creates a hive materialized
view for testing.
- case object v2_3 extends HiveVersion("2.3.6",
+ case object v2_3 extends HiveVersion("2.3.7",
exclusions = Seq("org.apache.calcite:calcite-druid",
"org.apache.calcite.avatica:avatica",
"org.apache.curator:*",
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]