[jira] [Updated] (SPARK-39997) ParquetSchemaConverter fails match schema by id

Zhen Wang (Jira) Sat, 06 Aug 2022 06:33:08 -0700


     [ 
https://issues.apache.org/jira/browse/SPARK-39997?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]


Zhen Wang updated SPARK-39997:
------------------------------
    Description: 

{code:scala}
  test("SPARK-38094: absence of field ids: reading nested schema struct field 
renamed") {
    withTempDir { dir =>
      // now with nested schema/complex type

      val innerTypeRenamed = new StructType().add("c1", IntegerType, true, 
withId(6));
      val readSchema =
        new StructType()
          .add("c", ArrayType(innerTypeRenamed), true, withId(3))
          .add("e", IntegerType, true, withId(5))


      val innerType = new StructType().add("c0", IntegerType, true, withId(6))
      val writeSchema =
        new StructType()
          .add("c", ArrayType(innerType), true, withId(3))
          .add("randomName", StringType, true)

      val writeData = Seq(Row(Seq(Row(100)), "text"), Row(Seq(Row(100)), 
"more"))

      spark.createDataFrame(writeData.asJava, writeSchema)
        .write.mode("overwrite").parquet(dir.getCanonicalPath)

      withAllParquetReaders {
        checkAnswer(spark.read.schema(readSchema).parquet(dir.getCanonicalPath),
          // a, b, c, d all couldn't be found
          Row(Seq(Row(100)), null) :: Row(Seq(Row(100)), null) :: Nil)
      }
    }
  }
{code}


  was:
```
  test("SPARK-38094: absence of field ids: reading nested schema struct field 
renamed") {
    withTempDir { dir =>
      // now with nested schema/complex type

      val innerTypeRenamed = new StructType().add("c1", IntegerType, true, 
withId(6));
      val readSchema =
        new StructType()
          .add("c", ArrayType(innerTypeRenamed), true, withId(3))
          .add("e", IntegerType, true, withId(5))


      val innerType = new StructType().add("c0", IntegerType, true, withId(6))
      val writeSchema =
        new StructType()
          .add("c", ArrayType(innerType), true, withId(3))
          .add("randomName", StringType, true)

      val writeData = Seq(Row(Seq(Row(100)), "text"), Row(Seq(Row(100)), 
"more"))

      spark.createDataFrame(writeData.asJava, writeSchema)
        .write.mode("overwrite").parquet(dir.getCanonicalPath)

      withAllParquetReaders {
        checkAnswer(spark.read.schema(readSchema).parquet(dir.getCanonicalPath),
          // a, b, c, d all couldn't be found
          Row(Seq(Row(100)), null) :: Row(Seq(Row(100)), null) :: Nil)
      }
    }
  }
```


> ParquetSchemaConverter fails match schema by id
> -----------------------------------------------
>
>                 Key: SPARK-39997
>                 URL: https://issues.apache.org/jira/browse/SPARK-39997
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 3.3.0
>            Reporter: Zhen Wang
>            Priority: Major
>
> {code:scala}
>   test("SPARK-38094: absence of field ids: reading nested schema struct field 
> renamed") {
>     withTempDir { dir =>
>       // now with nested schema/complex type
>       val innerTypeRenamed = new StructType().add("c1", IntegerType, true, 
> withId(6));
>       val readSchema =
>         new StructType()
>           .add("c", ArrayType(innerTypeRenamed), true, withId(3))
>           .add("e", IntegerType, true, withId(5))
>       val innerType = new StructType().add("c0", IntegerType, true, withId(6))
>       val writeSchema =
>         new StructType()
>           .add("c", ArrayType(innerType), true, withId(3))
>           .add("randomName", StringType, true)
>       val writeData = Seq(Row(Seq(Row(100)), "text"), Row(Seq(Row(100)), 
> "more"))
>       spark.createDataFrame(writeData.asJava, writeSchema)
>         .write.mode("overwrite").parquet(dir.getCanonicalPath)
>       withAllParquetReaders {
>         
> checkAnswer(spark.read.schema(readSchema).parquet(dir.getCanonicalPath),
>           // a, b, c, d all couldn't be found
>           Row(Seq(Row(100)), null) :: Row(Seq(Row(100)), null) :: Nil)
>       }
>     }
>   }
> {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[jira] [Updated] (SPARK-39997) ParquetSchemaConverter fails match schema by id

Reply via email to