Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/21091#discussion_r182661319
--- Diff:
sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala ---
@@ -34,79 +34,81 @@ class QueryPartitionSuite extends QueryTest with
SQLTestUtils with TestHiveSingl
import spark.implicits._
test("SPARK-5068: query data when path doesn't exist") {
- withSQLConf((SQLConf.HIVE_VERIFY_PARTITION_PATH.key, "true")) {
- val testData = sparkContext.parallelize(
- (1 to 10).map(i => TestData(i, i.toString))).toDF()
- testData.createOrReplaceTempView("testData")
-
- val tmpDir = Files.createTempDir()
- // create the table for test
- sql(s"CREATE TABLE table_with_partition(key int,value string) " +
- s"PARTITIONED by (ds string) location '${tmpDir.toURI}' ")
- sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='1')
" +
- "SELECT key,value FROM testData")
- sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='2')
" +
- "SELECT key,value FROM testData")
- sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='3')
" +
- "SELECT key,value FROM testData")
- sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='4')
" +
- "SELECT key,value FROM testData")
-
- // test for the exist path
- checkAnswer(sql("select key,value from table_with_partition"),
- testData.toDF.collect ++ testData.toDF.collect
- ++ testData.toDF.collect ++ testData.toDF.collect)
-
- // delete the path of one partition
- tmpDir.listFiles
- .find { f => f.isDirectory && f.getName().startsWith("ds=") }
- .foreach { f => Utils.deleteRecursively(f) }
-
- // test for after delete the path
- checkAnswer(sql("select key,value from table_with_partition"),
- testData.toDF.collect ++ testData.toDF.collect ++
testData.toDF.collect)
-
- sql("DROP TABLE IF EXISTS table_with_partition")
- sql("DROP TABLE IF EXISTS createAndInsertTest")
+ withSQLConf(SQLConf.HIVE_VERIFY_PARTITION_PATH.key -> "true") {
+ withTempView("testData") {
+ withTable("table_with_partition", "createAndInsertTest") {
+ withTempDir { tmpDir =>
+ val testData = sparkContext.parallelize(
+ (1 to 10).map(i => TestData(i, i.toString))).toDF()
+ testData.createOrReplaceTempView("testData")
+
+ // create the table for test
+ sql(s"CREATE TABLE table_with_partition(key int,value string)
" +
+ s"PARTITIONED by (ds string) location '${tmpDir.toURI}' ")
+ sql("INSERT OVERWRITE TABLE table_with_partition partition
(ds='1') " +
+ "SELECT key,value FROM testData")
+ sql("INSERT OVERWRITE TABLE table_with_partition partition
(ds='2') " +
+ "SELECT key,value FROM testData")
+ sql("INSERT OVERWRITE TABLE table_with_partition partition
(ds='3') " +
+ "SELECT key,value FROM testData")
+ sql("INSERT OVERWRITE TABLE table_with_partition partition
(ds='4') " +
+ "SELECT key,value FROM testData")
+
+ // test for the exist path
+ checkAnswer(sql("select key,value from table_with_partition"),
+ testData.union(testData).union(testData).union(testData))
+
+ // delete the path of one partition
+ tmpDir.listFiles
+ .find { f => f.isDirectory &&
f.getName().startsWith("ds=") }
+ .foreach { f => Utils.deleteRecursively(f) }
+
+ // test for after delete the path
+ checkAnswer(sql("select key,value from table_with_partition"),
+ testData.union(testData).union(testData))
+ }
+ }
+ }
}
}
test("Replace spark.sql.hive.verifyPartitionPath by
spark.files.ignoreMissingFiles") {
--- End diff --
These 2 tests are exactly same except the config, can we create a common
method for it?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]