[ 
https://issues.apache.org/jira/browse/HUDI-7134?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

loukey_j updated HUDI-7134:
---------------------------
    Description: 
You can reproduce the problem by following the steps below. The value of add1 
in step 7 is not as expected.

1、CREATE TABLE if not exists hudi_ut_schema_evolution 
(id INT, version INT, name STRING, birthDate TIMESTAMP, inc_day STRING) USING 
HUDI 
PARTITIONED BY (inc_day) TBLPROPERTIES (delta.enableChangeDataFeed='true', 
type='cow', primaryKey='id')

2、merge into hudi_ut_schema_evolution t using ( select 1 as id, 2 as version, 
'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as birthDate, 
'2023-10-02' as inc_day) s  on t.id=s.id when matched THEN UPDATE SET * WHEN 
NOT MATCHED THEN INSERT *

3、ALTER TABLE hudi_ut_schema_evolution ADD COLUMNS (add1 String AFTER id);

4、merge into hudi_ut_schema_evolution t using ( select '1' as add1, 2 as id, 2 
as version, 'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as 
birthDate, '2023-10-02' as inc_day) s  on t.id=s.id when matched THEN UPDATE 
SET * WHEN NOT MATCHED THEN INSERT *
 
5、ALTER TABLE hudi_ut_schema_evolution DROP COLUMN add1;

6、select {color:red}'1' as add1{color}, 3 as id, 2 as version, 'str_1' as name, 
cast('2023-01-01 12:12:12.0' as timestamp) as birthDate, '2023-10-02' as 
inc_day) s  on t.id=s.id when matched THEN UPDATE SET * WHEN NOT MATCHED THEN 
INSERT *;

7、select * from hudi_ut_schema_evolution;
+-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno 
|_hoodie_record_key|_hoodie_partition_path|_hoodie_file_name                    
                                    |add1|id |version|name |birthDate          
|inc_day   |
+-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+
|20231122164141030  |20231122164141030_0_0|1                 
|inc_day=2023-10-02    
|9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|null|1
  |2      |str_1|2023-01-01 12:12:12|2023-10-02|
|20231122165045413  |20231122165045413_0_1|2                 
|inc_day=2023-10-02    
|9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|null|2
  |2      |str_1|2023-01-01 12:12:12|2023-10-02|
|20231122165413036  |20231122165413036_0_2|3                 
|inc_day=2023-10-02    
|9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|{color:red}null{color}|3
  |2      |str_1|2023-01-01 12:12:12|2023-10-02|
+-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+

8、show create table hudi_ut_schema_evolution;
CREATE TABLE unisql.hudi_ut_schema_evolution (
  `_hoodie_commit_time` STRING COMMENT '',
  `_hoodie_commit_seqno` STRING COMMENT '',
  `_hoodie_record_key` STRING COMMENT '',
  `_hoodie_partition_path` STRING COMMENT '',
  `_hoodie_file_name` STRING COMMENT '',
  {color:red}`add1` STRING,
  `id` INT,{color}
  `version` INT,
  `name` STRING,
  `birthDate` TIMESTAMP,
  `inc_day` STRING)
PARTITIONED BY (inc_day)
TBLPROPERTIES(
  'hoodie.query.as.ro.table' = 'false',
  'last_commit_completion_time_sync' = '20231122171640801',
  'last_commit_time_sync' = '20231122171627218',
  'primaryKey' = 'id',
  'type' = 'cow')

  was:
{code:java}
1、CREATE TABLE if not exists hudi_ut_schema_evolution 
(id INT, version INT, name STRING, birthDate TIMESTAMP, inc_day STRING) USING 
HUDI 
PARTITIONED BY (inc_day) TBLPROPERTIES (delta.enableChangeDataFeed='true', 
type='cow', primaryKey='id')

2、merge into hudi_ut_schema_evolution t using ( select 1 as id, 2 as version, 
'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as birthDate, 
'2023-10-02' as inc_day) s  on t.id=s.id when matched THEN UPDATE SET * WHEN 
NOT MATCHED THEN INSERT *

3、ALTER TABLE hudi_ut_schema_evolution ADD COLUMNS (add1 String AFTER id);

4、merge into hudi_ut_schema_evolution t using ( select '1' as add1, 2 as id, 2 
as version, 'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as 
birthDate, '2023-10-02' as inc_day) s  on t.id=s.id when matched THEN UPDATE 
SET * WHEN NOT MATCHED THEN INSERT *
 
5、ALTER TABLE hudi_ut_schema_evolution DROP COLUMN add1;

6、select {color:red}'1' as add1{color}, 3 as id, 2 as version, 'str_1' as name, 
cast('2023-01-01 12:12:12.0' as timestamp) as birthDate, '2023-10-02' as 
inc_day) s  on t.id=s.id when matched THEN UPDATE SET * WHEN NOT MATCHED THEN 
INSERT *;

7、select * from hudi_ut_schema_evolution;
+-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+
|_hoodie_commit_time|_hoodie_commit_seqno 
|_hoodie_record_key|_hoodie_partition_path|_hoodie_file_name                    
                                    |add1|id |version|name |birthDate          
|inc_day   |
+-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+
|20231122164141030  |20231122164141030_0_0|1                 
|inc_day=2023-10-02    
|9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|null|1
  |2      |str_1|2023-01-01 12:12:12|2023-10-02|
|20231122165045413  |20231122165045413_0_1|2                 
|inc_day=2023-10-02    
|9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|null|2
  |2      |str_1|2023-01-01 12:12:12|2023-10-02|
|20231122165413036  |20231122165413036_0_2|3                 
|inc_day=2023-10-02    
|9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|{color:red}null{color}|3
  |2      |str_1|2023-01-01 12:12:12|2023-10-02|
+-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+

8、show create table hudi_ut_schema_evolution;
CREATE TABLE unisql.hudi_ut_schema_evolution (
  `_hoodie_commit_time` STRING COMMENT '',
  `_hoodie_commit_seqno` STRING COMMENT '',
  `_hoodie_record_key` STRING COMMENT '',
  `_hoodie_partition_path` STRING COMMENT '',
  `_hoodie_file_name` STRING COMMENT '',
  {color:red}`add1` STRING,
  `id` INT,{color}
  `version` INT,
  `name` STRING,
  `birthDate` TIMESTAMP,
  `inc_day` STRING)
PARTITIONED BY (inc_day)
TBLPROPERTIES(
  'hoodie.query.as.ro.table' = 'false',
  'last_commit_completion_time_sync' = '20231122171640801',
  'last_commit_time_sync' = '20231122171627218',
  'primaryKey' = 'id',
  'type' = 'cow')
{code}



> After deleting the field and re-executing the merge, the result is not as 
> expected.
> -----------------------------------------------------------------------------------
>
>                 Key: HUDI-7134
>                 URL: https://issues.apache.org/jira/browse/HUDI-7134
>             Project: Apache Hudi
>          Issue Type: Bug
>          Components: spark
>    Affects Versions: 0.14.0
>         Environment: hudi 0.14 spark 3.2.1
>            Reporter: loukey_j
>            Priority: Major
>              Labels: schema-evolution
>
> You can reproduce the problem by following the steps below. The value of add1 
> in step 7 is not as expected.
> 1、CREATE TABLE if not exists hudi_ut_schema_evolution 
> (id INT, version INT, name STRING, birthDate TIMESTAMP, inc_day STRING) USING 
> HUDI 
> PARTITIONED BY (inc_day) TBLPROPERTIES (delta.enableChangeDataFeed='true', 
> type='cow', primaryKey='id')
> 2、merge into hudi_ut_schema_evolution t using ( select 1 as id, 2 as version, 
> 'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as birthDate, 
> '2023-10-02' as inc_day) s  on t.id=s.id when matched THEN UPDATE SET * WHEN 
> NOT MATCHED THEN INSERT *
> 3、ALTER TABLE hudi_ut_schema_evolution ADD COLUMNS (add1 String AFTER id);
> 4、merge into hudi_ut_schema_evolution t using ( select '1' as add1, 2 as id, 
> 2 as version, 'str_1' as name, cast('2023-01-01 12:12:12.0' as timestamp) as 
> birthDate, '2023-10-02' as inc_day) s  on t.id=s.id when matched THEN UPDATE 
> SET * WHEN NOT MATCHED THEN INSERT *
>  
> 5、ALTER TABLE hudi_ut_schema_evolution DROP COLUMN add1;
> 6、select {color:red}'1' as add1{color}, 3 as id, 2 as version, 'str_1' as 
> name, cast('2023-01-01 12:12:12.0' as timestamp) as birthDate, '2023-10-02' 
> as inc_day) s  on t.id=s.id when matched THEN UPDATE SET * WHEN NOT MATCHED 
> THEN INSERT *;
> 7、select * from hudi_ut_schema_evolution;
> +-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+
> |_hoodie_commit_time|_hoodie_commit_seqno 
> |_hoodie_record_key|_hoodie_partition_path|_hoodie_file_name                  
>                                       |add1|id |version|name |birthDate       
>    |inc_day   |
> +-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+
> |20231122164141030  |20231122164141030_0_0|1                 
> |inc_day=2023-10-02    
> |9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|null|1
>   |2      |str_1|2023-01-01 12:12:12|2023-10-02|
> |20231122165045413  |20231122165045413_0_1|2                 
> |inc_day=2023-10-02    
> |9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|null|2
>   |2      |str_1|2023-01-01 12:12:12|2023-10-02|
> |20231122165413036  |20231122165413036_0_2|3                 
> |inc_day=2023-10-02    
> |9fa5823c-7e29-4330-9b05-dd72e6088d62-0_0-112-98_20231122165413036.parquet|{color:red}null{color}|3
>   |2      |str_1|2023-01-01 12:12:12|2023-10-02|
> +-------------------+---------------------+------------------+----------------------+-------------------------------------------------------------------------+----+---+-------+-----+-------------------+----------+
> 8、show create table hudi_ut_schema_evolution;
> CREATE TABLE unisql.hudi_ut_schema_evolution (
>   `_hoodie_commit_time` STRING COMMENT '',
>   `_hoodie_commit_seqno` STRING COMMENT '',
>   `_hoodie_record_key` STRING COMMENT '',
>   `_hoodie_partition_path` STRING COMMENT '',
>   `_hoodie_file_name` STRING COMMENT '',
>   {color:red}`add1` STRING,
>   `id` INT,{color}
>   `version` INT,
>   `name` STRING,
>   `birthDate` TIMESTAMP,
>   `inc_day` STRING)
> PARTITIONED BY (inc_day)
> TBLPROPERTIES(
>   'hoodie.query.as.ro.table' = 'false',
>   'last_commit_completion_time_sync' = '20231122171640801',
>   'last_commit_time_sync' = '20231122171627218',
>   'primaryKey' = 'id',
>   'type' = 'cow')



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to