This is an automated email from the ASF dual-hosted git repository.
srowen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new c65f9b2 [SPARK-26839][SQL] Work around classloader changes in Java 9
for Hive isolation
c65f9b2 is described below
commit c65f9b2bc35c2926bb3658f65fe4f8a0b8e9fe4a
Author: Sean Owen <[email protected]>
AuthorDate: Wed Mar 20 09:12:52 2019 -0500
[SPARK-26839][SQL] Work around classloader changes in Java 9 for Hive
isolation
Note, this doesn't really resolve the JIRA, but makes the changes we can
make so far that would be required to solve it.
## What changes were proposed in this pull request?
Java 9+ changed how ClassLoaders work. The two most salient points:
- The boot classloader no longer 'sees' the platform classes. A new
'platform classloader' does and should be the parent of new ClassLoaders
- The system classloader is no longer a URLClassLoader, so we can't get the
URLs of JARs in its classpath
## How was this patch tested?
We'll see whether Java 8 tests still pass here. Java 11 tests do not fully
pass at this point; more notes below. This does make progress on the failures
though.
(NB: to test with Java 11, you need to build with Java 8 first, setting
JAVA_HOME and java's executable correctly, then switch both to Java 11 for
testing.)
Closes #24057 from srowen/SPARK-26839.
Authored-by: Sean Owen <[email protected]>
Signed-off-by: Sean Owen <[email protected]>
---
.../org/apache/spark/sql/hive/HiveUtils.scala | 20 +++---
.../sql/hive/client/IsolatedClientLoader.scala | 77 +++++++++++++---------
2 files changed, 59 insertions(+), 38 deletions(-)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index 38bbe64..a7f40c6 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -19,8 +19,6 @@ package org.apache.spark.sql.hive
import java.io.File
import java.net.{URL, URLClassLoader}
-import java.nio.charset.StandardCharsets
-import java.sql.Timestamp
import java.util.Locale
import java.util.concurrent.TimeUnit
@@ -28,12 +26,11 @@ import scala.collection.JavaConverters._
import scala.collection.mutable.HashMap
import scala.language.implicitConversions
+import org.apache.commons.lang3.{JavaVersion, SystemUtils}
import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.hive.common.`type`.HiveDecimal
import org.apache.hadoop.hive.conf.HiveConf
import org.apache.hadoop.hive.conf.HiveConf.ConfVars
import org.apache.hadoop.hive.ql.session.SessionState
-import org.apache.hadoop.hive.serde2.io.{DateWritable, TimestampWritable}
import org.apache.hadoop.util.VersionInfo
import org.apache.spark.{SparkConf, SparkContext}
@@ -329,10 +326,17 @@ private[spark] object HiveUtils extends Logging {
val classLoader = Utils.getContextOrSparkClassLoader
val jars = allJars(classLoader)
- if (jars.length == 0) {
- throw new IllegalArgumentException(
- "Unable to locate hive jars to connect to metastore. " +
- s"Please set ${HIVE_METASTORE_JARS.key}.")
+ if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) {
+ // Do nothing. The system classloader is no longer a URLClassLoader in
Java 9,
+ // so it won't match the case in allJars above. It no longer exposes
URLs of
+ // the system classpath
+ } else {
+ // Verify at least one jar was found
+ if (jars.length == 0) {
+ throw new IllegalArgumentException(
+ "Unable to locate hive jars to connect to metastore. " +
+ s"Please set ${HIVE_METASTORE_JARS.key}.")
+ }
}
logInfo(
diff --git
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index efa97b2..98999eb 100644
---
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -25,6 +25,7 @@ import java.util
import scala.util.Try
import org.apache.commons.io.{FileUtils, IOUtils}
+import org.apache.commons.lang3.{JavaVersion, SystemUtils}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hive.conf.HiveConf.ConfVars
@@ -157,7 +158,6 @@ private[hive] object IsolatedClientLoader extends Logging {
* @param isolationOn When true, custom versions of barrier classes will be
constructed. Must be
* true unless loading the version of hive that is on
Sparks classloader.
* @param sharesHadoopClasses When true, we will share Hadoop classes between
Spark and
- * @param rootClassLoader The system root classloader. Must not know about
Hive classes.
* @param baseClassLoader The spark classloader that is used to load shared
classes.
*/
private[hive] class IsolatedClientLoader(
@@ -168,15 +168,11 @@ private[hive] class IsolatedClientLoader(
val config: Map[String, String] = Map.empty,
val isolationOn: Boolean = true,
val sharesHadoopClasses: Boolean = true,
- val rootClassLoader: ClassLoader =
ClassLoader.getSystemClassLoader.getParent.getParent,
val baseClassLoader: ClassLoader =
Thread.currentThread().getContextClassLoader,
val sharedPrefixes: Seq[String] = Seq.empty,
val barrierPrefixes: Seq[String] = Seq.empty)
extends Logging {
- // Check to make sure that the root classloader does not know about Hive.
-
assert(Try(rootClassLoader.loadClass("org.apache.hadoop.hive.conf.HiveConf")).isFailure)
-
/** All jars used by the hive specific classloader. */
protected def allJars = execJars.toArray
@@ -191,8 +187,8 @@ private[hive] class IsolatedClientLoader(
(sharesHadoopClasses && isHadoopClass) ||
name.startsWith("scala.") ||
(name.startsWith("com.google") && !name.startsWith("com.google.cloud")) ||
- name.startsWith("java.lang.") ||
- name.startsWith("java.net") ||
+ name.startsWith("java.") ||
+ name.startsWith("javax.sql.") ||
sharedPrefixes.exists(name.startsWith)
}
@@ -214,30 +210,51 @@ private[hive] class IsolatedClientLoader(
private[hive] val classLoader: MutableURLClassLoader = {
val isolatedClassLoader =
if (isolationOn) {
- new URLClassLoader(allJars, rootClassLoader) {
- override def loadClass(name: String, resolve: Boolean): Class[_] = {
- val loaded = findLoadedClass(name)
- if (loaded == null) doLoadClass(name, resolve) else loaded
- }
- def doLoadClass(name: String, resolve: Boolean): Class[_] = {
- val classFileName = name.replaceAll("\\.", "/") + ".class"
- if (isBarrierClass(name)) {
- // For barrier classes, we construct a new copy of the class.
- val bytes =
IOUtils.toByteArray(baseClassLoader.getResourceAsStream(classFileName))
- logDebug(s"custom defining: $name -
${util.Arrays.hashCode(bytes)}")
- defineClass(name, bytes, 0, bytes.length)
- } else if (!isSharedClass(name)) {
- logDebug(s"hive class: $name -
${getResource(classToPath(name))}")
- super.loadClass(name, resolve)
+ if (allJars.isEmpty) {
+ // See HiveUtils; this is the Java 9+ + builtin mode scenario
+ baseClassLoader
+ } else {
+ val rootClassLoader: ClassLoader =
+ if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) {
+ // In Java 9, the boot classloader can see few JDK classes. The
intended parent
+ // classloader for delegation is now the platform classloader.
+ // See http://java9.wtf/class-loading/
+ val platformCL =
+ classOf[ClassLoader].getMethod("getPlatformClassLoader").
+ invoke(null).asInstanceOf[ClassLoader]
+ // Check to make sure that the root classloader does not know
about Hive.
+
assert(Try(platformCL.loadClass("org.apache.hadoop.hive.conf.HiveConf")).isFailure)
+ platformCL
} else {
- // For shared classes, we delegate to baseClassLoader, but fall
back in case the
- // class is not found.
- logDebug(s"shared class: $name")
- try {
- baseClassLoader.loadClass(name)
- } catch {
- case _: ClassNotFoundException =>
- super.loadClass(name, resolve)
+ // The boot classloader is represented by null (the instance
itself isn't accessible)
+ // and before Java 9 can see all JDK classes
+ null
+ }
+ new URLClassLoader(allJars, rootClassLoader) {
+ override def loadClass(name: String, resolve: Boolean): Class[_] =
{
+ val loaded = findLoadedClass(name)
+ if (loaded == null) doLoadClass(name, resolve) else loaded
+ }
+ def doLoadClass(name: String, resolve: Boolean): Class[_] = {
+ val classFileName = name.replaceAll("\\.", "/") + ".class"
+ if (isBarrierClass(name)) {
+ // For barrier classes, we construct a new copy of the class.
+ val bytes =
IOUtils.toByteArray(baseClassLoader.getResourceAsStream(classFileName))
+ logDebug(s"custom defining: $name -
${util.Arrays.hashCode(bytes)}")
+ defineClass(name, bytes, 0, bytes.length)
+ } else if (!isSharedClass(name)) {
+ logDebug(s"hive class: $name -
${getResource(classToPath(name))}")
+ super.loadClass(name, resolve)
+ } else {
+ // For shared classes, we delegate to baseClassLoader, but
fall back in case the
+ // class is not found.
+ logDebug(s"shared class: $name")
+ try {
+ baseClassLoader.loadClass(name)
+ } catch {
+ case _: ClassNotFoundException =>
+ super.loadClass(name, resolve)
+ }
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]