[
https://issues.apache.org/jira/browse/SPARK-39997?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Zhen Wang updated SPARK-39997:
------------------------------
Description:
{code:scala}
test("SPARK-38094: absence of field ids: reading nested schema struct field
renamed") {
withTempDir { dir =>
// now with nested schema/complex type
val innerTypeRenamed = new StructType().add("c1", IntegerType, true,
withId(6));
val readSchema =
new StructType()
.add("c", ArrayType(innerTypeRenamed), true, withId(3))
.add("e", IntegerType, true, withId(5))
val innerType = new StructType().add("c0", IntegerType, true, withId(6))
val writeSchema =
new StructType()
.add("c", ArrayType(innerType), true, withId(3))
.add("randomName", StringType, true)
val writeData = Seq(Row(Seq(Row(100)), "text"), Row(Seq(Row(100)),
"more"))
spark.createDataFrame(writeData.asJava, writeSchema)
.write.mode("overwrite").parquet(dir.getCanonicalPath)
withAllParquetReaders {
checkAnswer(spark.read.schema(readSchema).parquet(dir.getCanonicalPath),
// a, b, c, d all couldn't be found
Row(Seq(Row(100)), null) :: Row(Seq(Row(100)), null) :: Nil)
}
}
}
{code}
was:
```
test("SPARK-38094: absence of field ids: reading nested schema struct field
renamed") {
withTempDir { dir =>
// now with nested schema/complex type
val innerTypeRenamed = new StructType().add("c1", IntegerType, true,
withId(6));
val readSchema =
new StructType()
.add("c", ArrayType(innerTypeRenamed), true, withId(3))
.add("e", IntegerType, true, withId(5))
val innerType = new StructType().add("c0", IntegerType, true, withId(6))
val writeSchema =
new StructType()
.add("c", ArrayType(innerType), true, withId(3))
.add("randomName", StringType, true)
val writeData = Seq(Row(Seq(Row(100)), "text"), Row(Seq(Row(100)),
"more"))
spark.createDataFrame(writeData.asJava, writeSchema)
.write.mode("overwrite").parquet(dir.getCanonicalPath)
withAllParquetReaders {
checkAnswer(spark.read.schema(readSchema).parquet(dir.getCanonicalPath),
// a, b, c, d all couldn't be found
Row(Seq(Row(100)), null) :: Row(Seq(Row(100)), null) :: Nil)
}
}
}
```
> ParquetSchemaConverter fails match schema by id
> -----------------------------------------------
>
> Key: SPARK-39997
> URL: https://issues.apache.org/jira/browse/SPARK-39997
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 3.3.0
> Reporter: Zhen Wang
> Priority: Major
>
> {code:scala}
> test("SPARK-38094: absence of field ids: reading nested schema struct field
> renamed") {
> withTempDir { dir =>
> // now with nested schema/complex type
> val innerTypeRenamed = new StructType().add("c1", IntegerType, true,
> withId(6));
> val readSchema =
> new StructType()
> .add("c", ArrayType(innerTypeRenamed), true, withId(3))
> .add("e", IntegerType, true, withId(5))
> val innerType = new StructType().add("c0", IntegerType, true, withId(6))
> val writeSchema =
> new StructType()
> .add("c", ArrayType(innerType), true, withId(3))
> .add("randomName", StringType, true)
> val writeData = Seq(Row(Seq(Row(100)), "text"), Row(Seq(Row(100)),
> "more"))
> spark.createDataFrame(writeData.asJava, writeSchema)
> .write.mode("overwrite").parquet(dir.getCanonicalPath)
> withAllParquetReaders {
>
> checkAnswer(spark.read.schema(readSchema).parquet(dir.getCanonicalPath),
> // a, b, c, d all couldn't be found
> Row(Seq(Row(100)), null) :: Row(Seq(Row(100)), null) :: Nil)
> }
> }
> }
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]