Github user yhuai commented on a diff in the pull request:
https://github.com/apache/spark/pull/9979#discussion_r46005696
--- Diff:
sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
---
@@ -34,23 +34,54 @@ import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.util.{MutableURLClassLoader, Utils}
/** Factory for `IsolatedClientLoader` with specific versions of hive. */
-private[hive] object IsolatedClientLoader {
+private[hive] object IsolatedClientLoader extends Logging {
/**
* Creates isolated Hive client loaders by downloading the requested
version from maven.
*/
def forVersion(
- version: String,
+ hiveMetastoreVersion: String,
+ hadoopVersion: String,
config: Map[String, String] = Map.empty,
ivyPath: Option[String] = None,
sharedPrefixes: Seq[String] = Seq.empty,
barrierPrefixes: Seq[String] = Seq.empty): IsolatedClientLoader =
synchronized {
- val resolvedVersion = hiveVersion(version)
- val files = resolvedVersions.getOrElseUpdate(resolvedVersion,
- downloadVersion(resolvedVersion, ivyPath))
+ val resolvedVersion = hiveVersion(hiveMetastoreVersion)
+ // We will first try to share Hadoop classes. If we cannot resolve the
Hadoop artifact
+ // with the given version, we will use Hadoop 2.4.0 and then will not
share Hadoop classes.
+ var sharesHadoopClasses = true
+ val files = if (resolvedVersions.contains((resolvedVersion,
hadoopVersion))) {
+ resolvedVersions((resolvedVersion, hadoopVersion))
+ } else {
+ val (downloadedFiles, actualHadoopVersion) =
+ try {
+ (downloadVersion(resolvedVersion, hadoopVersion, ivyPath),
hadoopVersion)
+ } catch {
+ case e: RuntimeException if e.getMessage.contains("hadoop") =>
+ // If the error message contains hadoop, it is probably
because the hadoop
+ // version cannot be resolved (e.g. it is a vendor specific
version like
+ // 2.0.0-cdh4.1.1). If it is the case, we will try just
+ // "org.apache.hadoop:hadoop-client:2.4.0".
"org.apache.hadoop:hadoop-client:2.4.0"
+ // is used just because we used to hard code it as the hadoop
artifact to download.
+ logWarning(s"Failed to resolve Hadoop artifacts for the
version ${hadoopVersion}. " +
+ s"We will change the hadoop version from ${hadoopVersion} to
2.4.0 and try again. " +
+ "Hadoop classes will not be shared between Spark and Hive
metastore client. " +
+ "It is recommended to set jars used by Hive metastore client
through " +
+ "spark.sql.hive.metastore.jars in the production
environment.")
+ sharesHadoopClasses = false
+ (downloadVersion(resolvedVersion, "2.4.0", ivyPath), "2.4.0")
+ case throwable: Throwable =>
+ // If it is other causes, we just re-throw the Throwable.
+ throw throwable
--- End diff --
Done
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]