[ 
https://issues.apache.org/jira/browse/HUDI-7131?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

loukey_j updated HUDI-7131:
---------------------------
    Affects Version/s: 0.14.0

> The requested schema is not compatible with the file schema
> -----------------------------------------------------------
>
>                 Key: HUDI-7131
>                 URL: https://issues.apache.org/jira/browse/HUDI-7131
>             Project: Apache Hudi
>          Issue Type: Bug
>    Affects Versions: 0.14.0
>            Reporter: loukey_j
>            Priority: Critical
>              Labels: core, merge, spark
>
> use global Index and data partition change , report an error: The requested 
> schema is not compatible with the file schema...
> Why not use the schema of 
> org.apache.hudi.common.table.TableSchemaResolver#getTableAvroSchemaInternal 
> to read hudi data
>  
> CREATE TABLE if not exists unisql.hudi_ut_time_traval
> (id INT, version INT, name STRING, birthDate TIMESTAMP, inc_day STRING) USING 
> HUDI
> PARTITIONED BY (inc_day) TBLPROPERTIES (type='cow', primaryKey='id');
> insert into unisql.hudi_ut_time_traval
> select 1 as id, 1 as version, 'str_1' as name, cast('2023-01-01 12:12:12.0' 
> as timestamp) as birthDate, cast('2023-10-01' as date) as inc_day;
> select * from hudi_ut_time_traval;
> +-------------------+---------------------+------------------+----------------------+------------------------------------------------------------------------+---+-------+-----+-------------------+----------+
> |_hoodie_commit_time|_hoodie_commit_seqno 
> |_hoodie_record_key|_hoodie_partition_path|_hoodie_file_name |id 
> |version|name |birthDate |inc_day |
> +-------------------+---------------------+------------------+----------------------+------------------------------------------------------------------------+---+-------+-----+-------------------+----------+
> |20231122100234339 |20231122100234339_0_0|1 |inc_day=2023-10-01 
> |8a510742-c060-4d12-898e-70bbd122f2e3-0_0-19-16_20231122100234339.parquet|1 
> |1 |str_1|2023-01-01 12:12:12|2023-10-01|
> +-------------------+---------------------+------------------+----------------------+------------------------------------------------------------------------+---+-------+-----+-------------------+----------+
> merge into hudi_ut_time_traval t using (
> select 1 as id, 2 as version, 'str_1' as name, cast('2023-01-01 12:12:12.0' 
> as timestamp) as birthDate, cast('2023-10-02' as date) as inc_day
> ) s on t.id=s.id when matched THEN UPDATE SET * WHEN NOT MATCHED THEN INSERT *
> Caused by: org.apache.parquet.io.ParquetDecodingException: The requested 
> schema is not compatible with the file schema. incompatible types: required 
> int32 id != optional int32 id
> at 
> org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.incompatibleSchema(ColumnIOFactory.java:101)
> at 
> org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.visitChildren(ColumnIOFactory.java:81)
> at 
> org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.visit(ColumnIOFactory.java:57)
> at org.apache.parquet.schema.MessageType.accept(MessageType.java:55)
> at org.apache.parquet.io.ColumnIOFactory.getColumnIO(ColumnIOFactory.java:162)
> at 
> org.apache.parquet.hadoop.InternalParquetRecordReader.checkRead(InternalParquetRecordReader.java:135)
> at 
> org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:225);
> parquet schema:
> {
> "type" : "record",
> "name" : "hudi_ut_time_traval_record",
> "namespace" : "hoodie.hudi_ut_time_traval",
> "fields" : [ {
> "name" : "_hoodie_commit_time",
> "type" : [ "null", "string" ],
> "doc" : "",
> "default" : null
> }, {
> "name" : "_hoodie_commit_seqno",
> "type" : [ "null", "string" ],
> "doc" : "",
> "default" : null
> }, {
> "name" : "_hoodie_record_key",
> "type" : [ "null", "string" ],
> "doc" : "",
> "default" : null
> }, {
> "name" : "_hoodie_partition_path",
> "type" : [ "null", "string" ],
> "doc" : "",
> "default" : null
> }, {
> "name" : "_hoodie_file_name",
> "type" : [ "null", "string" ],
> "doc" : "",
> "default" : null
> }, {
> "name" : "id",
> "type" : [ "null", "int" ],
> "default" : null
> }, {
> "name" : "version",
> "type" : [ "null", "int" ],
> "default" : null
> }, {
> "name" : "name",
> "type" : [ "null", "string" ],
> "default" : null
> }, {
> "name" : "birthDate",
> "type" : [ "null", {
> "type" : "long",
> "logicalType" : "timestamp-micros"
> } ],
> "default" : null
> }, {
> "name" : "inc_day",
> "type" : [ "null", "string" ],
> "default" : null
> } ]
> }
> org.apache.hudi.io.HoodieMergedReadHandle#readerSchema:
> {"type":"record","name":"hudi_ut_time_traval_record","namespace":"hoodie.hudi_ut_time_traval","fields":[\{"name":"id","type":"int"},\{"name":"version","type":"int"},\{"name":"name","type":"string"},\{"name":"birthDate","type":["null",{"type":"long","logicalType":"timestamp-micros"}],"default":null},\{"name":"inc_day","type":["null",{"type":"int","logicalType":"date"}],"default":null}]}
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to