[
https://issues.apache.org/jira/browse/HUDI-7134?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
loukey_j updated HUDI-7134:
---------------------------
Description:
You can reproduce the problem by following the steps below. The value of add1
in step 7 is not as expected.
1、CREATE TABLE if not exists hudi_ut_schema_evolution
(id INT, version INT, name STRING, birthDate TIMESTAMP, inc_day STRING) USING
HUDI
PARTITIONED BY (inc_day) TBLPROPERTIES (delta.enableChangeDataFeed='true',
type='cow', primaryKey='id')
2、merge into hudi_ut_schema_evolution t using ( select 1 as id, 2 as version,
'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as birthDate,
'2023-10-02' as inc_day) s on t.id=s.id when matched THEN UPDATE SET * WHEN
NOT MATCHED THEN INSERT *
3、ALTER TABLE hudi_ut_schema_evolution ADD COLUMNS (add1 String AFTER id);
4、merge into hudi_ut_schema_evolution t using ( select '1' as add1, 2 as id, 2
as version, 'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as
birthDate, '2023-10-02' as inc_day) s on t.id=s.id when matched THEN UPDATE
SET * WHEN NOT MATCHED THEN INSERT *
5、ALTER TABLE hudi_ut_schema_evolution DROP COLUMN add1;
6、select {color:red}'1' as add1{color}, 3 as id, 2 as version, 'str_1' as name,
cast('2023-01-01 12:12:12.0' as timestamp) as birthDate, '2023-10-02' as
inc_day) s on t.id=s.id when matched THEN UPDATE SET * WHEN NOT MATCHED THEN
INSERT *;
7、select * from hudi_ut_schema_evolution;
+-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno
|_hoodie_record_key|_hoodie_partition_path|_hoodie_file_name
|add1|id |version|name |birthDate
|inc_day |
+-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+
|20231122164141030 |20231122164141030_0_0|1
|inc_day=2023-10-02
|9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|null|1
|2 |str_1|2023-01-01 12:12:12|2023-10-02|
|20231122165045413 |20231122165045413_0_1|2
|inc_day=2023-10-02
|9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|null|2
|2 |str_1|2023-01-01 12:12:12|2023-10-02|
|20231122165413036 |20231122165413036_0_2|3
|inc_day=2023-10-02
|9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|{color:red}null{color}|3
|2 |str_1|2023-01-01 12:12:12|2023-10-02|
+-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+
8、show create table hudi_ut_schema_evolution;
CREATE TABLE unisql.hudi_ut_schema_evolution (
`_hoodie_commit_time` STRING COMMENT '',
`_hoodie_commit_seqno` STRING COMMENT '',
`_hoodie_record_key` STRING COMMENT '',
`_hoodie_partition_path` STRING COMMENT '',
`_hoodie_file_name` STRING COMMENT '',
{color:red}`add1` STRING,
`id` INT,{color}
`version` INT,
`name` STRING,
`birthDate` TIMESTAMP,
`inc_day` STRING)
PARTITIONED BY (inc_day)
TBLPROPERTIES(
'hoodie.query.as.ro.table' = 'false',
'last_commit_completion_time_sync' = '20231122171640801',
'last_commit_time_sync' = '20231122171627218',
'primaryKey' = 'id',
'type' = 'cow')
was:
{code:java}
1、CREATE TABLE if not exists hudi_ut_schema_evolution
(id INT, version INT, name STRING, birthDate TIMESTAMP, inc_day STRING) USING
HUDI
PARTITIONED BY (inc_day) TBLPROPERTIES (delta.enableChangeDataFeed='true',
type='cow', primaryKey='id')
2、merge into hudi_ut_schema_evolution t using ( select 1 as id, 2 as version,
'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as birthDate,
'2023-10-02' as inc_day) s on t.id=s.id when matched THEN UPDATE SET * WHEN
NOT MATCHED THEN INSERT *
3、ALTER TABLE hudi_ut_schema_evolution ADD COLUMNS (add1 String AFTER id);
4、merge into hudi_ut_schema_evolution t using ( select '1' as add1, 2 as id, 2
as version, 'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as
birthDate, '2023-10-02' as inc_day) s on t.id=s.id when matched THEN UPDATE
SET * WHEN NOT MATCHED THEN INSERT *
5、ALTER TABLE hudi_ut_schema_evolution DROP COLUMN add1;
6、select {color:red}'1' as add1{color}, 3 as id, 2 as version, 'str_1' as name,
cast('2023-01-01 12:12:12.0' as timestamp) as birthDate, '2023-10-02' as
inc_day) s on t.id=s.id when matched THEN UPDATE SET * WHEN NOT MATCHED THEN
INSERT *;
7、select * from hudi_ut_schema_evolution;
+-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno
|_hoodie_record_key|_hoodie_partition_path|_hoodie_file_name
|add1|id |version|name |birthDate
|inc_day |
+-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+
|20231122164141030 |20231122164141030_0_0|1
|inc_day=2023-10-02
|9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|null|1
|2 |str_1|2023-01-01 12:12:12|2023-10-02|
|20231122165045413 |20231122165045413_0_1|2
|inc_day=2023-10-02
|9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|null|2
|2 |str_1|2023-01-01 12:12:12|2023-10-02|
|20231122165413036 |20231122165413036_0_2|3
|inc_day=2023-10-02
|9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|{color:red}null{color}|3
|2 |str_1|2023-01-01 12:12:12|2023-10-02|
+-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+
8、show create table hudi_ut_schema_evolution;
CREATE TABLE unisql.hudi_ut_schema_evolution (
`_hoodie_commit_time` STRING COMMENT '',
`_hoodie_commit_seqno` STRING COMMENT '',
`_hoodie_record_key` STRING COMMENT '',
`_hoodie_partition_path` STRING COMMENT '',
`_hoodie_file_name` STRING COMMENT '',
{color:red}`add1` STRING,
`id` INT,{color}
`version` INT,
`name` STRING,
`birthDate` TIMESTAMP,
`inc_day` STRING)
PARTITIONED BY (inc_day)
TBLPROPERTIES(
'hoodie.query.as.ro.table' = 'false',
'last_commit_completion_time_sync' = '20231122171640801',
'last_commit_time_sync' = '20231122171627218',
'primaryKey' = 'id',
'type' = 'cow')
{code}
> After deleting the field and re-executing the merge, the result is not as
> expected.
> -----------------------------------------------------------------------------------
>
> Key: HUDI-7134
> URL: https://issues.apache.org/jira/browse/HUDI-7134
> Project: Apache Hudi
> Issue Type: Bug
> Components: spark
> Affects Versions: 0.14.0
> Environment: hudi 0.14 spark 3.2.1
> Reporter: loukey_j
> Priority: Major
> Labels: schema-evolution
>
> You can reproduce the problem by following the steps below. The value of add1
> in step 7 is not as expected.
> 1、CREATE TABLE if not exists hudi_ut_schema_evolution
> (id INT, version INT, name STRING, birthDate TIMESTAMP, inc_day STRING) USING
> HUDI
> PARTITIONED BY (inc_day) TBLPROPERTIES (delta.enableChangeDataFeed='true',
> type='cow', primaryKey='id')
> 2、merge into hudi_ut_schema_evolution t using ( select 1 as id, 2 as version,
> 'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as birthDate,
> '2023-10-02' as inc_day) s on t.id=s.id when matched THEN UPDATE SET * WHEN
> NOT MATCHED THEN INSERT *
> 3、ALTER TABLE hudi_ut_schema_evolution ADD COLUMNS (add1 String AFTER id);
> 4、merge into hudi_ut_schema_evolution t using ( select '1' as add1, 2 as id,
> 2 as version, 'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as
> birthDate, '2023-10-02' as inc_day) s on t.id=s.id when matched THEN UPDATE
> SET * WHEN NOT MATCHED THEN INSERT *
>
> 5、ALTER TABLE hudi_ut_schema_evolution DROP COLUMN add1;
> 6、select {color:red}'1' as add1{color}, 3 as id, 2 as version, 'str_1' as
> name, cast('2023-01-01 12:12:12.0' as timestamp) as birthDate, '2023-10-02'
> as inc_day) s on t.id=s.id when matched THEN UPDATE SET * WHEN NOT MATCHED
> THEN INSERT *;
> 7、select * from hudi_ut_schema_evolution;
> +-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+
> |_hoodie_commit_time|_hoodie_commit_seqno
> |_hoodie_record_key|_hoodie_partition_path|_hoodie_file_name
> |add1|id |version|name |birthDate
> |inc_day |
> +-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+
> |20231122164141030 |20231122164141030_0_0|1
> |inc_day=2023-10-02
> |9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|null|1
> |2 |str_1|2023-01-01 12:12:12|2023-10-02|
> |20231122165045413 |20231122165045413_0_1|2
> |inc_day=2023-10-02
> |9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|null|2
> |2 |str_1|2023-01-01 12:12:12|2023-10-02|
> |20231122165413036 |20231122165413036_0_2|3
> |inc_day=2023-10-02
> |9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|{color:red}null{color}|3
> |2 |str_1|2023-01-01 12:12:12|2023-10-02|
> +-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+
> 8、show create table hudi_ut_schema_evolution;
> CREATE TABLE unisql.hudi_ut_schema_evolution (
> `_hoodie_commit_time` STRING COMMENT '',
> `_hoodie_commit_seqno` STRING COMMENT '',
> `_hoodie_record_key` STRING COMMENT '',
> `_hoodie_partition_path` STRING COMMENT '',
> `_hoodie_file_name` STRING COMMENT '',
> {color:red}`add1` STRING,
> `id` INT,{color}
> `version` INT,
> `name` STRING,
> `birthDate` TIMESTAMP,
> `inc_day` STRING)
> PARTITIONED BY (inc_day)
> TBLPROPERTIES(
> 'hoodie.query.as.ro.table' = 'false',
> 'last_commit_completion_time_sync' = '20231122171640801',
> 'last_commit_time_sync' = '20231122171627218',
> 'primaryKey' = 'id',
> 'type' = 'cow')
--
This message was sent by Atlassian Jira
(v8.20.10#820010)