spark git commit: [SPARK-16793][SQL] Set the temporary warehouse path to sc'conf in TestHive.

rxin Mon, 01 Aug 2016 23:08:26 -0700

Repository: spark
Updated Branches:
  refs/heads/master 2eedc00b0 -> 5184df06b



[SPARK-16793][SQL] Set the temporary warehouse path to sc'conf in TestHive.

## What changes were proposed in this pull request?

With SPARK-15034, we could use the value of spark.sql.warehouse.dir to set the 
warehouse location. In TestHive, we can now simply set the temporary warehouse 
path in sc's conf, and thus, param "warehousePath" could be removed.

## How was this patch tested?

exsiting testsuites.

Author: jiangxingbo <jiangxin...@meituan.com>

Closes #14401 from jiangxb1987/warehousePath.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5184df06
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5184df06
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5184df06

Branch: refs/heads/master
Commit: 5184df06b347f86776c8ac87415b8002a5942a35
Parents: 2eedc00
Author: jiangxingbo <jiangxin...@meituan.com>
Authored: Mon Aug 1 23:08:06 2016 -0700
Committer: Reynold Xin <r...@databricks.com>
Committed: Mon Aug 1 23:08:06 2016 -0700

----------------------------------------------------------------------
 .../apache/spark/sql/hive/test/TestHive.scala   | 42 +++++++++-----------
 .../sql/hive/execution/HiveQuerySuite.scala     |  2 +-
 .../spark/sql/sources/BucketedReadSuite.scala   |  2 +-
 3 files changed, 21 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/5184df06/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 7f89204..fbacd59 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -54,6 +54,7 @@ object TestHive
         .set("spark.sql.test", "")
         .set("spark.sql.hive.metastore.barrierPrefixes",
           "org.apache.spark.sql.hive.execution.PairSerDe")
+        .set("spark.sql.warehouse.dir", 
TestHiveContext.makeWarehouseDir().toURI.getPath)
         // SPARK-8910
         .set("spark.ui.enabled", "false")))
 
@@ -111,7 +112,6 @@ class TestHiveContext(
  * A [[SparkSession]] used in [[TestHiveContext]].
  *
  * @param sc SparkContext
- * @param warehousePath path to the Hive warehouse directory
  * @param scratchDirPath scratch directory used by Hive's metastore client
  * @param metastoreTemporaryConf configuration options for Hive's metastore
  * @param existingSharedState optional [[TestHiveSharedState]]
@@ -120,23 +120,15 @@ class TestHiveContext(
  */
 private[hive] class TestHiveSparkSession(
     @transient private val sc: SparkContext,
-    val warehousePath: File,
     scratchDirPath: File,
     metastoreTemporaryConf: Map[String, String],
     @transient private val existingSharedState: Option[TestHiveSharedState],
     private val loadTestTables: Boolean)
   extends SparkSession(sc) with Logging { self =>
 
-  // TODO: We need to set the temp warehouse path to sc's conf.
-  // Right now, In SparkSession, we will set the warehouse path to the default 
one
-  // instead of the temp one. Then, we override the setting in 
TestHiveSharedState
-  // when we creating metadataHive. This flow is not easy to follow and can 
introduce
-  // confusion when a developer is debugging an issue. We need to refactor 
this part
-  // to just set the temp warehouse path in sc's conf.
   def this(sc: SparkContext, loadTestTables: Boolean) {
     this(
       sc,
-      Utils.createTempDir(namePrefix = "warehouse"),
       TestHiveContext.makeScratchDir(),
       HiveUtils.newTemporaryConfiguration(useInMemoryDerby = false),
       None,
@@ -151,16 +143,16 @@ private[hive] class TestHiveSparkSession(
   @transient
   override lazy val sharedState: TestHiveSharedState = {
     existingSharedState.getOrElse(
-      new TestHiveSharedState(sc, warehousePath, scratchDirPath, 
metastoreTemporaryConf))
+      new TestHiveSharedState(sc, scratchDirPath, metastoreTemporaryConf))
   }
 
   @transient
   override lazy val sessionState: TestHiveSessionState =
-    new TestHiveSessionState(self, warehousePath)
+    new TestHiveSessionState(self)
 
   override def newSession(): TestHiveSparkSession = {
     new TestHiveSparkSession(
-      sc, warehousePath, scratchDirPath, metastoreTemporaryConf, 
Some(sharedState), loadTestTables)
+      sc, scratchDirPath, metastoreTemporaryConf, Some(sharedState), 
loadTestTables)
   }
 
   private var cacheTables: Boolean = false
@@ -199,6 +191,12 @@ private[hive] class TestHiveSparkSession(
     new 
File(Thread.currentThread().getContextClassLoader.getResource(path).getFile)
   }
 
+  def getWarehousePath(): String = {
+    val tempConf = new SQLConf
+    sc.conf.getAll.foreach { case (k, v) => tempConf.setConfString(k, v) }
+    tempConf.warehousePath
+  }
+
   val describedTable = "DESCRIBE (\\w+)".r
 
   case class TestTable(name: String, commands: (() => Unit)*)
@@ -509,21 +507,19 @@ private[hive] class TestHiveFunctionRegistry extends 
SimpleFunctionRegistry {
 
 private[hive] class TestHiveSharedState(
     sc: SparkContext,
-    warehousePath: File,
     scratchDirPath: File,
     metastoreTemporaryConf: Map[String, String])
   extends HiveSharedState(sc) {
 
   override lazy val metadataHive: HiveClient = {
     TestHiveContext.newClientForMetadata(
-      sc.conf, sc.hadoopConfiguration, warehousePath, scratchDirPath, 
metastoreTemporaryConf)
+      sc.conf, sc.hadoopConfiguration, scratchDirPath, metastoreTemporaryConf)
   }
 }
 
 
 private[hive] class TestHiveSessionState(
-    sparkSession: TestHiveSparkSession,
-    warehousePath: File)
+    sparkSession: TestHiveSparkSession)
   extends HiveSessionState(sparkSession) { self =>
 
   override lazy val conf: SQLConf = {
@@ -533,7 +529,6 @@ private[hive] class TestHiveSessionState(
       override def clear(): Unit = {
         super.clear()
         TestHiveContext.overrideConfs.foreach { case (k, v) => 
setConfString(k, v) }
-        setConfString("hive.metastore.warehouse.dir", 
self.warehousePath.toURI.toString)
       }
     }
   }
@@ -571,13 +566,12 @@ private[hive] object TestHiveContext {
   def newClientForMetadata(
       conf: SparkConf,
       hadoopConf: Configuration,
-      warehousePath: File,
       scratchDirPath: File,
       metastoreTemporaryConf: Map[String, String]): HiveClient = {
     HiveUtils.newClientForMetadata(
       conf,
       hadoopConf,
-      hiveClientConfigurations(hadoopConf, warehousePath, scratchDirPath, 
metastoreTemporaryConf))
+      hiveClientConfigurations(hadoopConf, scratchDirPath, 
metastoreTemporaryConf))
   }
 
   /**
@@ -585,18 +579,20 @@ private[hive] object TestHiveContext {
    */
   def hiveClientConfigurations(
       hadoopConf: Configuration,
-      warehousePath: File,
       scratchDirPath: File,
       metastoreTemporaryConf: Map[String, String]): Map[String, String] = {
     HiveUtils.hiveClientConfigurations(hadoopConf) ++ metastoreTemporaryConf 
++ Map(
-      // Override WAREHOUSE_PATH and METASTOREWAREHOUSE to use the given path.
-      SQLConf.WAREHOUSE_PATH.key -> warehousePath.toURI.toString,
-      ConfVars.METASTOREWAREHOUSE.varname -> warehousePath.toURI.toString,
       ConfVars.METASTORE_INTEGER_JDO_PUSHDOWN.varname -> "true",
       ConfVars.SCRATCHDIR.varname -> scratchDirPath.toURI.toString,
       ConfVars.METASTORE_CLIENT_CONNECT_RETRY_DELAY.varname -> "1")
   }
 
+  def makeWarehouseDir(): File = {
+    val warehouseDir = Utils.createTempDir(namePrefix = "warehouse")
+    warehouseDir.delete()
+    warehouseDir
+  }
+
   def makeScratchDir(): File = {
     val scratchDir = Utils.createTempDir(namePrefix = "scratch")
     scratchDir.delete()

http://git-wip-us.apache.org/repos/asf/spark/blob/5184df06/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 31283b9..6785167 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -964,7 +964,7 @@ class HiveQuerySuite extends HiveComparisonTest with 
BeforeAndAfter {
         .mkString("/")
 
       // Loads partition data to a temporary table to verify contents
-      val path = 
s"${sparkSession.warehousePath}/dynamic_part_table/$partFolder/part-00000"
+      val path = 
s"${sparkSession.getWarehousePath}/dynamic_part_table/$partFolder/part-00000"
 
       sql("DROP TABLE IF EXISTS dp_verify")
       sql("CREATE TABLE dp_verify(intcol INT)")

http://git-wip-us.apache.org/repos/asf/spark/blob/5184df06/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index e461490..8d161a3 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -353,7 +353,7 @@ class BucketedReadSuite extends QueryTest with SQLTestUtils 
with TestHiveSinglet
     withTable("bucketed_table") {
       df1.write.format("parquet").bucketBy(8, 
"i").saveAsTable("bucketed_table")
       val tableDir = new File(hiveContext
-        .sparkSession.warehousePath, "bucketed_table")
+        .sparkSession.getWarehousePath, "bucketed_table")
       Utils.deleteRecursively(tableDir)
       df1.write.parquet(tableDir.getAbsolutePath)
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16793][SQL] Set the temporary warehouse path to sc'conf in TestHive.

Reply via email to