Github user gatorsmile commented on a diff in the pull request:
https://github.com/apache/spark/pull/20953#discussion_r178441790
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
---
@@ -382,6 +384,59 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
}
}
+ // =======================================
+ // Tests for parquet schema mismatch error
+ // =======================================
+ def testSchemaMismatch(path: String, vectorizedReaderEnabled: Boolean):
SparkException = {
+ import testImplicits._
+
+ var e : SparkException = null
+ // Disable databricks' vectorized parquet reader and use open source
version.
+ withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key ->
vectorizedReaderEnabled.toString) {
+ // Create two parquet files with different schemas in the same folder
+ Seq(("bcd", 2)).toDF("a",
"b").coalesce(1).write.mode("overwrite").parquet(s"$path/parquet")
+ Seq((1, "abc")).toDF("a",
"b").coalesce(1).write.mode("append").parquet(s"$path/parquet")
+
+ e = intercept[SparkException] {
+ spark.read.parquet(s"$path/parquet").collect()
+ }
+ }
+ return e
--- End diff --
Scala does not need `return`
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]