hudi-bot opened a new issue, #16313:
URL: https://github.com/apache/hudi/issues/16313
use global Index and data partition change , report an error: The requested
schema is not compatible with the file schema...
Why not use the schema of
org.apache.hudi.common.table.TableSchemaResolver#getTableAvroSchemaInternal to
read hudi data
CREATE TABLE if not exists unisql.hudi_ut_time_traval
(id INT, version INT, name STRING, birthDate TIMESTAMP, inc_day STRING)
USING HUDI
PARTITIONED BY (inc_day) TBLPROPERTIES (type='cow', primaryKey='id');
insert into unisql.hudi_ut_time_traval
select 1 as id, 1 as version, 'str_1' as name, cast('2023-01-01 12:12:12.0'
as timestamp) as birthDate, cast('2023-10-01' as date) as inc_day;
select * from hudi_ut_time_traval;
+-------------------+---------------------+------------------+----------------------+------------------------------------------------------------------------+---+-------+-----+-------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno
|_hoodie_record_key|_hoodie_partition_path|_hoodie_file_name |id |version|name
|birthDate |inc_day |
+-------------------+---------------------+------------------+----------------------+------------------------------------------------------------------------+---+-------+-----+-------------------+----------+
|20231122100234339 |20231122100234339_0_0|1 |inc_day=2023-10-01
|8a510742-c060-4d12-898e-70bbd122f2e3-0_0-19-16_20231122100234339.parquet|1 |1
|str_1|2023-01-01 12:12:12|2023-10-01|
+-------------------+---------------------+------------------+----------------------+------------------------------------------------------------------------+---+-------+-----+-------------------+----------+
merge into hudi_ut_time_traval t using (
select 1 as id, 2 as version, 'str_1' as name, cast('2023-01-01 12:12:12.0'
as timestamp) as birthDate, cast('2023-10-02' as date) as inc_day
) s on t.id=s.id when matched THEN UPDATE SET * WHEN NOT MATCHED THEN INSERT
*
Caused by: org.apache.parquet.io.ParquetDecodingException: The requested
schema is not compatible with the file schema. incompatible types: required
int32 id != optional int32 id
at
org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.incompatibleSchema(ColumnIOFactory.java:101)
at
org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.visitChildren(ColumnIOFactory.java:81)
at
org.apache.parquet.io.ColumnIOFactory$ColumnIOCreatorVisitor.visit(ColumnIOFactory.java:57)
at org.apache.parquet.schema.MessageType.accept(MessageType.java:55)
at
org.apache.parquet.io.ColumnIOFactory.getColumnIO(ColumnIOFactory.java:162)
at
org.apache.parquet.hadoop.InternalParquetRecordReader.checkRead(InternalParquetRecordReader.java:135)
at
org.apache.parquet.hadoop.InternalParquetRecordReader.nextKeyValue(InternalParquetRecordReader.java:225);
parquet schema:
{
"type" : "record",
"name" : "hudi_ut_time_traval_record",
"namespace" : "hoodie.hudi_ut_time_traval",
"fields" : [ {
"name" : "_hoodie_commit_time",
"type" : [ "null", "string" ],
"doc" : "",
"default" : null
}, {
"name" : "_hoodie_commit_seqno",
"type" : [ "null", "string" ],
"doc" : "",
"default" : null
}, {
"name" : "_hoodie_record_key",
"type" : [ "null", "string" ],
"doc" : "",
"default" : null
}, {
"name" : "_hoodie_partition_path",
"type" : [ "null", "string" ],
"doc" : "",
"default" : null
}, {
"name" : "_hoodie_file_name",
"type" : [ "null", "string" ],
"doc" : "",
"default" : null
}, {
"name" : "id",
"type" : [ "null", "int" ],
"default" : null
}, {
"name" : "version",
"type" : [ "null", "int" ],
"default" : null
}, {
"name" : "name",
"type" : [ "null", "string" ],
"default" : null
}, {
"name" : "birthDate",
"type" : [ "null", {
"type" : "long",
"logicalType" : "timestamp-micros"
} ],
"default" : null
}, {
"name" : "inc_day",
"type" : [ "null", "string" ],
"default" : null
} ]
}
org.apache.hudi.io.HoodieMergedReadHandle#readerSchema:
{"type":"record","name":"hudi_ut_time_traval_record","namespace":"hoodie.hudi_ut_time_traval","fields":[\{"name":"id","type":"int"},\{"name":"version","type":"int"},\{"name":"name","type":"string"},\{"name":"birthDate","type":["null",{"type":"long","logicalType":"timestamp-micros"}],"default":null},\{"name":"inc_day","type":["null",{"type":"int","logicalType":"date"}],"default":null}]}
## JIRA info
- Link: https://issues.apache.org/jira/browse/HUDI-7131
- Type: Bug
- Affects version(s):
- 0.14.0
- Fix version(s):
- 1.1.0
---
## Comments
22/Nov/23 02:47;danny0405;It looks like an known issue, we do not support
schema evolution on partiton fields yet.;;;
---
22/Nov/23 03:08;loukey_j;The schema of the table has not changed, only the
partition value of the data has changed.;;;
---
22/Nov/23 08:14;loukey_j;sorry, I didn't notice that I converted inc_day to
date type. Later I corrected the SQL and got the same error. Execute the
following sqls to reproduce. The root cause of the problem is that
hoodieWriteConfig.getSchema() is incompatible with the schema of hudi table
1. CREATE TABLE if not exists hudi_ut_time_traval
(id INT, version INT, name STRING, birthDate TIMESTAMP, inc_day STRING)
USING HUDI
PARTITIONED BY (inc_day) TBLPROPERTIES (type='cow', primaryKey='id');
2. merge into hudi_ut_time_traval using (select 1 as id, 2 as version,
'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as birthDate,
{color:red}'2023-10-01'{color} as inc_day) s on t.id=s.id when matched THEN
UPDATE SET * WHEN NOT MATCHED THEN INSERT *
3. merge into hudi_ut_time_traval using (select 1 as id, 2 as version,
'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as birthDate,
{color:red}'2023-10-02' {color}as inc_day) s on t.id=s.id when matched THEN
UPDATE SET * WHEN NOT MATCHED THEN INSERT *;;;
---
27/Nov/23 06:28;loukey_j;[~xushiyan] please take a look ;;;
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]