Zhen Wang created SPARK-39997:
---------------------------------
Summary: ParquetSchemaConverter fails match schema by id
Key: SPARK-39997
URL: https://issues.apache.org/jira/browse/SPARK-39997
Project: Spark
Issue Type: Bug
Components: SQL
Affects Versions: 3.3.0
Reporter: Zhen Wang
```
test("SPARK-38094: absence of field ids: reading nested schema struct field
renamed") {
withTempDir { dir =>
// now with nested schema/complex type
val innerTypeRenamed = new StructType().add("c1", IntegerType, true,
withId(6));
val readSchema =
new StructType()
.add("c", ArrayType(innerTypeRenamed), true, withId(3))
.add("e", IntegerType, true, withId(5))
val innerType = new StructType().add("c0", IntegerType, true, withId(6))
val writeSchema =
new StructType()
.add("c", ArrayType(innerType), true, withId(3))
.add("randomName", StringType, true)
val writeData = Seq(Row(Seq(Row(100)), "text"), Row(Seq(Row(100)),
"more"))
spark.createDataFrame(writeData.asJava, writeSchema)
.write.mode("overwrite").parquet(dir.getCanonicalPath)
withAllParquetReaders {
checkAnswer(spark.read.schema(readSchema).parquet(dir.getCanonicalPath),
// a, b, c, d all couldn't be found
Row(Seq(Row(100)), null) :: Row(Seq(Row(100)), null) :: Nil)
}
}
}
```
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]