hudi-bot opened a new issue, #16313:
URL: https://github.com/apache/hudi/issues/16313

   use global Index and data partition change , report an error: The requested 
schema is not compatible with the file schema...
   
   Why not use the schema of 
org.apache.hudi.common.table.TableSchemaResolver#getTableAvroSchemaInternal to 
read hudi data
   
    
   CREATE TABLE if not exists unisql.hudi_ut_time_traval
   (id INT, version INT, name STRING, birthDate TIMESTAMP, inc_day STRING) 
USING HUDI
   PARTITIONED BY (inc_day) TBLPROPERTIES (type='cow', primaryKey='id');
   
   insert into unisql.hudi_ut_time_traval
   select 1 as id, 1 as version, 'str_1' as name, cast('2023-01-01 12:12:12.0' 
as timestamp) as birthDate, cast('2023-10-01' as date) as inc_day;
   
   select * from hudi_ut_time_traval;
   
+-------------------+---------------------+------------------+----------------------+------------------------------------------------------------------------+---+-------+-----+-------------------+----------+
   |_hoodie_commit_time|_hoodie_commit_seqno 
|_hoodie_record_key|_hoodie_partition_path|_hoodie_file_name |id |version|name 
|birthDate |inc_day |
   
+-------------------+---------------------+------------------+----------------------+------------------------------------------------------------------------+---+-------+-----+-------------------+----------+
   |20231122100234339 |20231122100234339_0_0|1 |inc_day=2023-10-01 
|8a510742-c060-4d12-898e-70bbd122f2e3-0_0-19-16_20231122100234339.parquet|1 |1 
|str_1|2023-01-01 12:12:12|2023-10-01|
   
+-------------------+---------------------+------------------+----------------------+------------------------------------------------------------------------+---+-------+-----+-------------------+----------+
   
   merge into hudi_ut_time_traval t using (
   select 1 as id, 2 as version, 'str_1' as name, cast('2023-01-01 12:12:12.0' 
as timestamp) as birthDate, cast('2023-10-02' as date) as inc_day
   ) s on t.id=s.id when matched THEN UPDATE SET * WHEN NOT MATCHED THEN INSERT 
*
   
   Caused by: org.apache.parquet.io.ParquetDecodingException: The requested 
schema is not compatible with the file schema. incompatible types: required 
int32 id != optional int32 id
   at 
org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.incompatibleSchema(ColumnIOFactory.java:101)
   at 
org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.visitChildren(ColumnIOFactory.java:81)
   at 
org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.visit(ColumnIOFactory.java:57)
   at org.apache.parquet.schema.MessageType.accept(MessageType.java:55)
   at 
org.apache.parquet.io.ColumnIOFactory.getColumnIO(ColumnIOFactory.java:162)
   at 
org.apache.parquet.hadoop.InternalParquetRecordReader.checkRead(InternalParquetRecordReader.java:135)
   at 
org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:225);
   
   parquet schema:
   {
   "type" : "record",
   "name" : "hudi_ut_time_traval_record",
   "namespace" : "hoodie.hudi_ut_time_traval",
   "fields" : [ {
   "name" : "_hoodie_commit_time",
   "type" : [ "null", "string" ],
   "doc" : "",
   "default" : null
   }, {
   "name" : "_hoodie_commit_seqno",
   "type" : [ "null", "string" ],
   "doc" : "",
   "default" : null
   }, {
   "name" : "_hoodie_record_key",
   "type" : [ "null", "string" ],
   "doc" : "",
   "default" : null
   }, {
   "name" : "_hoodie_partition_path",
   "type" : [ "null", "string" ],
   "doc" : "",
   "default" : null
   }, {
   "name" : "_hoodie_file_name",
   "type" : [ "null", "string" ],
   "doc" : "",
   "default" : null
   }, {
   "name" : "id",
   "type" : [ "null", "int" ],
   "default" : null
   }, {
   "name" : "version",
   "type" : [ "null", "int" ],
   "default" : null
   }, {
   "name" : "name",
   "type" : [ "null", "string" ],
   "default" : null
   }, {
   "name" : "birthDate",
   "type" : [ "null", {
   "type" : "long",
   "logicalType" : "timestamp-micros"
   } ],
   "default" : null
   }, {
   "name" : "inc_day",
   "type" : [ "null", "string" ],
   "default" : null
   } ]
   }
   
   org.apache.hudi.io.HoodieMergedReadHandle#readerSchema:
   
   
{"type":"record","name":"hudi_ut_time_traval_record","namespace":"hoodie.hudi_ut_time_traval","fields":[\{"name":"id","type":"int"},\{"name":"version","type":"int"},\{"name":"name","type":"string"},\{"name":"birthDate","type":["null",{"type":"long","logicalType":"timestamp-micros"}],"default":null},\{"name":"inc_day","type":["null",{"type":"int","logicalType":"date"}],"default":null}]}
    
   
   ## JIRA info
   
   - Link: https://issues.apache.org/jira/browse/HUDI-7131
   - Type: Bug
   - Affects version(s):
     - 0.14.0
   - Fix version(s):
     - 1.1.0
   
   
   ---
   
   
   ## Comments
   
   22/Nov/23 02:47;danny0405;It looks like an known issue, we do not support 
schema evolution on partiton fields yet.;;;
   
   ---
   
   22/Nov/23 03:08;loukey_j;The schema of the table has not changed, only the 
partition value of the data has changed.;;;
   
   ---
   
   22/Nov/23 08:14;loukey_j;sorry, I didn't notice that I converted inc_day to 
date type. Later I corrected the SQL and got the same error. Execute the 
following sqls to reproduce. The root cause of the problem is that 
hoodieWriteConfig.getSchema() is incompatible with the schema of hudi table
   
   1. CREATE TABLE if not exists hudi_ut_time_traval
   (id INT, version INT, name STRING, birthDate TIMESTAMP, inc_day STRING) 
USING HUDI
   PARTITIONED BY (inc_day) TBLPROPERTIES (type='cow', primaryKey='id');
   
   2. merge into hudi_ut_time_traval using (select 1 as id, 2 as version, 
'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as birthDate, 
{color:red}'2023-10-01'{color} as inc_day) s on t.id=s.id when matched THEN 
UPDATE SET * WHEN NOT MATCHED THEN INSERT *
   
   3. merge into hudi_ut_time_traval using (select 1 as id, 2 as version, 
'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as birthDate, 
{color:red}'2023-10-02' {color}as inc_day) s on t.id=s.id when matched THEN 
UPDATE SET * WHEN NOT MATCHED THEN INSERT *;;;
   
   ---
   
   27/Nov/23 06:28;loukey_j;[~xushiyan] please take a look ;;;


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to