This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 5664403 [SPARK-38094][SQL][FOLLOWUP] Fix exception message and add a
test case
5664403 is described below
commit 5664403bb960b91e2f0fa87f2630b96e4c124701
Author: jackierwzhang <[email protected]>
AuthorDate: Tue Mar 1 19:44:02 2022 -0800
[SPARK-38094][SQL][FOLLOWUP] Fix exception message and add a test case
### What changes were proposed in this pull request?
Minor follow ups on https://github.com/apache/spark/pull/35385:
1. Add a nested schema test
2. Fixed an error message.
### Why are the changes needed?
Better observability.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing test
Closes #35700 from jackierwzhang/SPARK-38094-minor.
Authored-by: jackierwzhang <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../datasources/parquet/ParquetReadSupport.scala | 2 +-
.../parquet/ParquetFieldIdIOSuite.scala | 31 +++++++++++++++++++++-
2 files changed, 31 insertions(+), 2 deletions(-)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
index 97e691f..69684f9 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
@@ -140,7 +140,7 @@ object ParquetReadSupport extends Logging {
"Spark read schema expects field Ids, " +
"but Parquet file schema doesn't contain any field Ids.\n" +
"Please remove the field ids from Spark schema or ignore missing ids
by " +
- "setting `spark.sql.parquet.fieldId.ignoreMissing = true`\n" +
+ s"setting `${SQLConf.IGNORE_MISSING_PARQUET_FIELD_ID.key} = true`\n"
+
s"""
|Spark read schema:
|${catalystRequestedSchema.prettyJson}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala
index ff0bb2f..5e01d3f 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFieldIdIOSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.SparkException
import org.apache.spark.sql.{QueryTest, Row}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{IntegerType, Metadata, MetadataBuilder,
StringType, StructType}
+import org.apache.spark.sql.types.{ArrayType, IntegerType, MapType, Metadata,
MetadataBuilder, StringType, StructType}
class ParquetFieldIdIOSuite extends QueryTest with ParquetTest with
SharedSparkSession {
@@ -107,6 +107,35 @@ class ParquetFieldIdIOSuite extends QueryTest with
ParquetTest with SharedSparkS
}
}
+ test("SPARK-38094: absence of field ids: reading nested schema") {
+ withTempDir { dir =>
+ // now with nested schema/complex type
+ val readSchema =
+ new StructType()
+ .add("a", IntegerType, true, withId(1))
+ .add("b", ArrayType(StringType), true, withId(2))
+ .add("c", new StructType().add("c1", IntegerType, true, withId(6)),
true, withId(3))
+ .add("d", MapType(StringType, StringType), true, withId(4))
+ .add("e", IntegerType, true, withId(5))
+
+ val writeSchema =
+ new StructType()
+ .add("a", IntegerType, true, withId(5))
+ .add("randomName", StringType, true)
+
+ val writeData = Seq(Row(100, "text"), Row(200, "more"))
+
+ spark.createDataFrame(writeData.asJava, writeSchema)
+ .write.mode("overwrite").parquet(dir.getCanonicalPath)
+
+ withAllParquetReaders {
+
checkAnswer(spark.read.schema(readSchema).parquet(dir.getCanonicalPath),
+ // a, b, c, d all couldn't be found
+ Row(null, null, null, null, 100) :: Row(null, null, null, null, 200)
:: Nil)
+ }
+ }
+ }
+
test("multiple id matches") {
withTempDir { dir =>
val readSchema =
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]