Github user vanzin commented on a diff in the pull request:
https://github.com/apache/spark/pull/5876#discussion_r29897419
--- Diff:
sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala ---
@@ -93,9 +99,129 @@ class HiveContext(sc: SparkContext) extends
SQLContext(sc) {
protected[sql] def convertCTAS: Boolean =
getConf("spark.sql.hive.convertCTAS", "false").toBoolean
+ /**
+ * The version of the hive client that will be used to communicate with
the metastore. Note that
+ * this does not necessarily need to be the same version of Hive that is
used internally by
+ * Spark SQL for execution.
+ */
+ protected[hive] def hiveMetastoreVersion: String =
+ getConf(HIVE_METASTORE_VERSION, "0.13.1")
+
+ /**
+ * The location of the jars that should be used to instantiate the
HiveMetastoreClient. This
+ * property can be one of three options:
+ * - a colon-separated list of jar files or directories for hive and
hadoop.
+ * - builtin - attempt to discover the jars that were used to load
Spark SQL and use those. This
+ * option is only valid when using the execution version of
Hive.
+ * - maven - download the correct version of hive on demand from maven.
+ */
+ protected[hive] def hiveMetastoreJars: String =
+ getConf(HIVE_METASTORE_JARS, "builtin")
+
@transient
protected[sql] lazy val substitutor = new VariableSubstitution()
+
+ /** A local instance of hive that is only used for execution. */
+ protected[hive] lazy val localMetastore = {
+ val temp = Utils.createTempDir()
+ temp.delete()
+ temp
+ }
+
+ @transient
+ protected[hive] lazy val executionConf = new HiveConf()
+ executionConf.set(
+ "javax.jdo.option.ConnectionURL",
s"jdbc:derby:;databaseName=$localMetastore;create=true")
+
+ /** The version of hive used internally by Spark SQL. */
+ lazy val hiveExecutionVersion: String = "0.13.1"
+
+ /**
+ * The copy of the hive client that is used for execution. Currently
this must always be
+ * Hive 13 as this is the version of Hive that is packaged with Spark
SQL. This copy of the
+ * client is used for execution related tasks like registering temporary
functions or ensuring
+ * that the ThreadLocal SessionState is correctly populated. This copy
of Hive is *not* used
+ * for storing peristent metadata, and only point to a dummy metastore
in a temporary directory.
+ */
+ @transient
+ protected[hive] lazy val executionHive: ClientWrapper = {
+ logInfo(s"Initilizing execution hive, version $hiveExecutionVersion")
+ new ClientWrapper(
+ version = IsolatedClientLoader.hiveVersion(hiveExecutionVersion),
+ config = Map(
+ "javax.jdo.option.ConnectionURL" ->
+ s"jdbc:derby:;databaseName=$localMetastore;create=true"))
+ }
+ SessionState.setCurrentSessionState(executionHive.state)
+
+ /**
+ * The copy of the Hive client that is used to retrieve metadata from
the Hive MetaStore.
+ * The version of the Hive client that is used here must match the
metastore that is configured
+ * in the hive-site.xml file.
+ */
+ @transient
+ protected[hive] lazy val metadataHive: ClientInterface = {
+ val metaVersion =
IsolatedClientLoader.hiveVersion(hiveMetastoreVersion)
+
+ // We instantiate a HiveConf here to read in the hive-site.xml file
and then pass the options
+ // into the isolated client loader
+ val metadataConf = new HiveConf()
+ // `configure` goes second to override other settings.
+ val allConfig = metadataConf.iterator.map(e => e.getKey ->
e.getValue).toMap ++ configure
+
+ val isolatedLoader = if (hiveMetastoreJars == "builtin") {
+ if (hiveExecutionVersion != hiveMetastoreVersion) {
+ throw new IllegalArgumentException(
+ "Builtin jars can only be used when hive execution version ==
hive metastore version. " +
+ s"Execution: ${hiveExecutionVersion} != Metastore:
${hiveMetastoreVersion}. " +
+ "Specify a vaild path to the correct hive jars using
$HIVE_METASTORE_JARS " +
+ s"or change $HIVE_METASTORE_VERSION to $hiveExecutionVersion.")
+ }
+ val jars = getClass.getClassLoader match {
+ case urlClassLoader: java.net.URLClassLoader =>
urlClassLoader.getURLs
+ case other =>
+ throw new IllegalArgumentException(
+ "Unable to locate hive jars to connect to metastore " +
+ s"using classloader ${other.getClass.getName}. " +
+ "Please set spark.sql.hive.metastore.jars")
+ }
+
+ logInfo(
+ s"Initializing HiveMetastoreConnection version
$hiveMetastoreVersion using Spark classes.")
+ new IsolatedClientLoader(
+ version = metaVersion,
+ execJars = jars.toSeq,
+ config = allConfig,
+ isolationOn = true)
+ } else if (hiveMetastoreJars == "maven") {
+ // TODO: Support for loading the jars from an already downloaded
location.
+ logInfo(
+ s"Initializing HiveMetastoreConnection version
$hiveMetastoreVersion using maven.")
+ IsolatedClientLoader.forVersion(hiveMetastoreVersion, allConfig )
+ } else {
+ // Convert to files and expand any directories.
+ val jars =
+ hiveMetastoreJars
+ .split(":")
+ .map(new java.io.File(_))
+ .flatMap {
+ case f if f.isDirectory => f.listFiles()
--- End diff --
Hmmm... this is not how classpaths generally work. Directories are added
directly to the classpath, not its children. This means that I can't add an
exploded jar (i.e. a directory with a bunch of classes in it) using this config.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]