loukey_j created HUDI-7134:
------------------------------
Summary: After deleting the field and re-executing the merge, the
result is not as expected.
Key: HUDI-7134
URL: https://issues.apache.org/jira/browse/HUDI-7134
Project: Apache Hudi
Issue Type: Bug
Components: spark
Affects Versions: 0.14.0
Environment: hudi 0.14 spark 3.2.1
Reporter: loukey_j
{code:java}
1、CREATE TABLE if not exists hudi_ut_schema_evolution
(id INT, version INT, name STRING, birthDate TIMESTAMP, inc_day STRING) USING
HUDI
PARTITIONED BY (inc_day) TBLPROPERTIES (delta.enableChangeDataFeed='true',
type='cow', primaryKey='id')
2、merge into hudi_ut_schema_evolution t using ( select 1 as id, 2 as version,
'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as birthDate,
'2023-10-02' as inc_day) s on t.id=s.id when matched THEN UPDATE SET * WHEN
NOT MATCHED THEN INSERT *
3、ALTER TABLE hudi_ut_schema_evolution ADD COLUMNS (add1 String AFTER id);
4、merge into hudi_ut_schema_evolution t using ( select '1' as add1, 2 as id, 2
as version, 'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as
birthDate, '2023-10-02' as inc_day) s on t.id=s.id when matched THEN UPDATE
SET * WHEN NOT MATCHED THEN INSERT *
5、ALTER TABLE hudi_ut_schema_evolution DROP COLUMN add1;
6、select {color:red}'1' as add1{color}, 3 as id, 2 as version, 'str_1' as name,
cast('2023-01-01 12:12:12.0' as timestamp) as birthDate, '2023-10-02' as
inc_day) s on t.id=s.id when matched THEN UPDATE SET * WHEN NOT MATCHED THEN
INSERT *;
7、select * from hudi_ut_schema_evolution;
+-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno
|_hoodie_record_key|_hoodie_partition_path|_hoodie_file_name
|add1|id |version|name |birthDate
|inc_day |
+-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+
|20231122164141030 |20231122164141030_0_0|1
|inc_day=2023-10-02
|9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|null|1
|2 |str_1|2023-01-01 12:12:12|2023-10-02|
|20231122165045413 |20231122165045413_0_1|2
|inc_day=2023-10-02
|9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|null|2
|2 |str_1|2023-01-01 12:12:12|2023-10-02|
|20231122165413036 |20231122165413036_0_2|3
|inc_day=2023-10-02
|9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|{color:red}null{color}|3
|2 |str_1|2023-01-01 12:12:12|2023-10-02|
+-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+
8、show create table hudi_ut_schema_evolution;
CREATE TABLE unisql.hudi_ut_schema_evolution (
`_hoodie_commit_time` STRING COMMENT '',
`_hoodie_commit_seqno` STRING COMMENT '',
`_hoodie_record_key` STRING COMMENT '',
`_hoodie_partition_path` STRING COMMENT '',
`_hoodie_file_name` STRING COMMENT '',
{color:red}`add1` STRING,
`id` INT,{color}
`version` INT,
`name` STRING,
`birthDate` TIMESTAMP,
`inc_day` STRING)
PARTITIONED BY (inc_day)
TBLPROPERTIES(
'hoodie.query.as.ro.table' = 'false',
'last_commit_completion_time_sync' = '20231122171640801',
'last_commit_time_sync' = '20231122171627218',
'primaryKey' = 'id',
'type' = 'cow')
{code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)