Github user lindblombr commented on a diff in the pull request:
https://github.com/apache/spark/pull/21847#discussion_r205568259
--- Diff:
external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala ---
@@ -40,12 +41,40 @@ import org.apache.spark.sql.types._
class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
val episodesAvro = testFile("episodes.avro")
val testAvro = testFile("test.avro")
+ val messyAvro = testFile("messy.avro")
+ val multiRecordTypeUnionAvro = testFile("multirecordtypeunion.avro")
+ val episodesSchemaFile = testFile("episodes.avsc")
+ val testSchemaFile = testFile("test.avsc")
+ val messySchemaFile = testFile("messy.avsc")
+ val multiRecordTypeUnionSchemaFile =
testFile("multirecordtypeunion.avsc")
override protected def beforeAll(): Unit = {
super.beforeAll()
spark.conf.set("spark.sql.files.maxPartitionBytes", 1024)
}
+ def forceSchemaCheck(spark: SparkSession, inputPath: String, schemaFile:
String): Unit = {
+ withTempPath { tempDir =>
+ val df = spark.read.format("avro").load(inputPath)
+
+ val tempSaveDir1 = s"$tempDir/test1/"
+ val tempSaveDir2 = s"$tempDir/test2/"
+
+ df.write.format("avro").save(tempSaveDir1)
+
+ val newDf = spark.read.format("avro").load(tempSaveDir1)
+ checkAnswer(df, newDf)
--- End diff --
I changed from `forceSchemaCheck` to `checkSpecifySchemaOnWrite`. I think
thats a bit more clear.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]