Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/22611#discussion_r222169616 --- Diff: external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala --- @@ -342,6 +342,53 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils { } } + private def createDummyCorruptFile(dir: File): Unit = { + FileUtils.forceMkdir(dir) + val corruptFile = new File(dir, "corrupt.avro") + val writer = new BufferedWriter(new FileWriter(corruptFile)) + writer.write("corrupt") + writer.close() + } + + test("Ignore corrupt Avro file if flag IGNORE_CORRUPT_FILES enabled") { + withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> "true") { + withTempPath { dir => + createDummyCorruptFile(dir) + val message = intercept[FileNotFoundException] { + spark.read.format("avro").load(dir.getAbsolutePath).schema + }.getMessage + assert(message.contains("No Avro files found.")) + + val srcFile = new File("src/test/resources/episodes.avro") + val destFile = new File(dir, "episodes.avro") + FileUtils.copyFile(srcFile, destFile) + + val df = spark.read.format("avro").load(srcFile.getAbsolutePath) + val schema = df.schema + val result = df.collect() + // Schema inference picks random readable sample file. + // Here we use a loop to eliminate randomness. + (1 to 5).foreach { _ => + assert(spark.read.format("avro").load(dir.getAbsolutePath).schema == schema) + checkAnswer(spark.read.format("avro").load(dir.getAbsolutePath), result) + } + } + } + } + + test("Throws IOException on reading corrupt Avro file if flag IGNORE_CORRUPT_FILES disabled") { + withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> "false") { + withTempPath { dir => + createDummyCorruptFile(dir) + val message = intercept[org.apache.spark.SparkException] { + spark.read.format("avro").load(dir.getAbsolutePath).schema --- End diff -- `.schema` wouldn't probably be needed.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org