spark git commit: [SPARK-23305][SQL][TEST] Test `spark.sql.files.ignoreMissingFiles` for all file-based data sources

lixiao Sat, 03 Feb 2018 00:04:27 -0800

Repository: spark
Updated Branches:
  refs/heads/branch-2.3 1bcb3728d -> 4de206182



[SPARK-23305][SQL][TEST] Test `spark.sql.files.ignoreMissingFiles` for all 
file-based data sources

## What changes were proposed in this pull request?

Like Parquet, all file-based data source handles 
`spark.sql.files.ignoreMissingFiles` correctly. We had better have a test 
coverage for feature parity and in order to prevent future accidental 
regression for all data sources.

## How was this patch tested?

Pass Jenkins with a newly added test case.

Author: Dongjoon Hyun <dongj...@apache.org>

Closes #20479 from dongjoon-hyun/SPARK-23305.

(cherry picked from commit 522e0b1866a0298669c83de5a47ba380dc0b7c84)
Signed-off-by: gatorsmile <gatorsm...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4de20618
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4de20618
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4de20618

Branch: refs/heads/branch-2.3
Commit: 4de206182c8a1f76e1e5e6b597c4b3890e2ca255
Parents: 1bcb372
Author: Dongjoon Hyun <dongj...@apache.org>
Authored: Sat Feb 3 00:04:00 2018 -0800
Committer: gatorsmile <gatorsm...@gmail.com>
Committed: Sat Feb 3 00:04:08 2018 -0800

----------------------------------------------------------------------
 .../spark/sql/FileBasedDataSourceSuite.scala    | 37 ++++++++++++++++++++
 .../datasources/parquet/ParquetQuerySuite.scala | 33 -----------------
 2 files changed, 37 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4de20618/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index c272c99..640d6b1 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -17,6 +17,10 @@
 
 package org.apache.spark.sql
 
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 
 class FileBasedDataSourceSuite extends QueryTest with SharedSQLContext {
@@ -92,4 +96,37 @@ class FileBasedDataSourceSuite extends QueryTest with 
SharedSQLContext {
       }
     }
   }
+
+  allFileBasedDataSources.foreach { format =>
+    testQuietly(s"Enabling/disabling ignoreMissingFiles using $format") {
+      def testIgnoreMissingFiles(): Unit = {
+        withTempDir { dir =>
+          val basePath = dir.getCanonicalPath
+          Seq("0").toDF("a").write.format(format).save(new Path(basePath, 
"first").toString)
+          Seq("1").toDF("a").write.format(format).save(new Path(basePath, 
"second").toString)
+          val thirdPath = new Path(basePath, "third")
+          Seq("2").toDF("a").write.format(format).save(thirdPath.toString)
+          val df = spark.read.format(format).load(
+            new Path(basePath, "first").toString,
+            new Path(basePath, "second").toString,
+            new Path(basePath, "third").toString)
+
+          val fs = 
thirdPath.getFileSystem(spark.sparkContext.hadoopConfiguration)
+          assert(fs.delete(thirdPath, true))
+          checkAnswer(df, Seq(Row("0"), Row("1")))
+        }
+      }
+
+      withSQLConf(SQLConf.IGNORE_MISSING_FILES.key -> "true") {
+        testIgnoreMissingFiles()
+      }
+
+      withSQLConf(SQLConf.IGNORE_MISSING_FILES.key -> "false") {
+        val exception = intercept[SparkException] {
+          testIgnoreMissingFiles()
+        }
+        assert(exception.getMessage().contains("does not exist"))
+      }
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/4de20618/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index 6ad88ed..55b0f72 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -355,39 +355,6 @@ class ParquetQuerySuite extends QueryTest with ParquetTest 
with SharedSQLContext
     }
   }
 
-  testQuietly("Enabling/disabling ignoreMissingFiles") {
-    def testIgnoreMissingFiles(): Unit = {
-      withTempDir { dir =>
-        val basePath = dir.getCanonicalPath
-        spark.range(1).toDF("a").write.parquet(new Path(basePath, 
"first").toString)
-        spark.range(1, 2).toDF("a").write.parquet(new Path(basePath, 
"second").toString)
-        val thirdPath = new Path(basePath, "third")
-        spark.range(2, 3).toDF("a").write.parquet(thirdPath.toString)
-        val df = spark.read.parquet(
-          new Path(basePath, "first").toString,
-          new Path(basePath, "second").toString,
-          new Path(basePath, "third").toString)
-
-        val fs = 
thirdPath.getFileSystem(spark.sparkContext.hadoopConfiguration)
-        fs.delete(thirdPath, true)
-        checkAnswer(
-          df,
-          Seq(Row(0), Row(1)))
-      }
-    }
-
-    withSQLConf(SQLConf.IGNORE_MISSING_FILES.key -> "true") {
-      testIgnoreMissingFiles()
-    }
-
-    withSQLConf(SQLConf.IGNORE_MISSING_FILES.key -> "false") {
-      val exception = intercept[SparkException] {
-        testIgnoreMissingFiles()
-      }
-      assert(exception.getMessage().contains("does not exist"))
-    }
-  }
-
   /**
    * this is part of test 'Enabling/disabling ignoreCorruptFiles' but run in a 
loop
    * to increase the chance of failure


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-23305][SQL][TEST] Test `spark.sql.files.ignoreMissingFiles` for all file-based data sources

Reply via email to