[ 
https://issues.apache.org/jira/browse/HUDI-8821?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Leon Lin updated HUDI-8821:
---------------------------
    Description: 
Reproduction Steps:

 
{code:java}
// 1. Create an empty table with Hudi 0.14 + Spark 3.5.0 

spark.sql(
"""
|CREATE TABLE lliangyu_table_mor (
| event_id INT,
| event_date STRING,
| event_name STRING,
| event_ts STRING,
| event_type STRING
|) USING hudi
| OPTIONS(
| type = 'mor',
| primaryKey = 'event_id,event_date',
| preCombileField = 'event_ts',
| hoodie.write.table.version = 6,
| hoodie.compact.inline = 'true',
| hoodie.compact.inline.max.delta.commits = 2
|)
|PARTITIONED BY (event_type)
|LOCATION 
's3://lliangyu-580974493829-us-west-2/warehouse/hudi/lliangyu_table_mor';
""".stripMargin){code}
{code:java}
// 2. Insert some rows using Spark 3.5.3 / Hudi 1.0 Backward writer
spark.sql("set hoodie.write.table.version=6")
spark.sql("set hoodie.compact.inline='true'")
spark.sql("set hoodie.compact.inline.max.delta.commits=2")

val insertStatements = Seq(
"INSERT INTO lliangyu_table_mor VALUES (100, '2015-01-01', 'event_name_900', 
'2015-01-01T13:51:39.340396Z', 'type1');",
"INSERT INTO lliangyu_table_mor VALUES (101, '2015-01-01', 'event_name_546', 
'2015-01-01T12:14:58.597216Z', 'type2');",
"INSERT INTO lliangyu_table_mor VALUES (102, '2015-01-01', 'event_name_345', 
'2015-01-01T13:51:40.417052Z', 'type3');",
"INSERT INTO lliangyu_table_mor VALUES (103, '2015-01-01', 'event_name_234', 
'2015-01-01T13:51:40.519832Z', 'type4');",
"INSERT INTO lliangyu_table_mor VALUES (104, '2015-01-01', 'event_name_123', 
'2015-01-01T12:15:00.512679Z', 'type1');",
"INSERT INTO lliangyu_table_mor VALUES (105, '2015-01-01', 'event_name_678', 
'2015-01-01T13:51:42.248818Z', 'type2');",
"INSERT INTO lliangyu_table_mor VALUES (106, '2015-01-01', 'event_name_890', 
'2015-01-01T13:51:44.735360Z', 'type3');",
"INSERT INTO lliangyu_table_mor VALUES (107, '2015-01-01', 'event_name_944', 
'2015-01-01T13:51:45.019544Z', 'type4');",
"INSERT INTO lliangyu_table_mor VALUES (108, '2015-01-01', 'event_name_456', 
'2015-01-01T13:51:45.208007Z', 'type1');",
"INSERT INTO lliangyu_table_mor VALUES (109, '2015-01-01', 'event_name_567', 
'2015-01-01T13:51:45.369689Z', 'type2');",
"INSERT INTO lliangyu_table_mor VALUES (110, '2015-01-01', 'event_name_789', 
'2015-01-01T12:15:05.664947Z', 'type3');"
)

insertStatements.foreach { query => spark.sql(query) }

// DUE TO issue with https://issues.apache.org/jira/browse/HUDI-8820
// You will find some rows inserted using backward writer does not appear in 
selection.{code}
 
{code:java}
// 3. Delete some rows using Spark 3.5.3 / Hudi 1.0 Backward writer
// Run deletes on rows that could be retrieved from selection
spark.sql("DELETE FROM default.lliangyu_table_mor WHERE event_type = 
'type2'").show(false);

// Run select again returns incorrect results.
+-------------------+---------------------+----------------------------------+----------------------+---------------------------------------------------------------------------+--------+----------+--------------+---------------------------+----------+
 |_hoodie_commit_time|_hoodie_commit_seqno |_hoodie_record_key 
|_hoodie_partition_path|_hoodie_file_name |event_id|event_date|event_name 
|event_ts |event_type| 
+-------------------+---------------------+----------------------------------+----------------------+---------------------------------------------------------------------------+--------+----------+--------------+---------------------------+----------+
 |20250103202501108 
|20250103202501108_0_0|event_id:108,event_date:2015-01-01|event_type=type1 
|b935d179-56b3-4f81-81e4-8bb0cf97c873-0_0-131-4218_20250103202501108.parquet|108
 |2015-01-01|event_name_456|2015-01-01T13:51:45.208007Z|type1 | 
+-------------------+---------------------+----------------------------------+----------------------+---------------------------------------------------------------------------+--------+----------+--------------+---------------------------+----------+
{code}
 

 

 

 

  was:
Reproduction Steps:

 
{code:java}
// 1. Create an empty table with Hudi 0.14 + Spark 3.5.0 spark.sql(
"""
|CREATE TABLE lliangyu_table_mor (
| event_id INT,
| event_date STRING,
| event_name STRING,
| event_ts STRING,
| event_type STRING
|) USING hudi
| OPTIONS(
| type = 'mor',
| primaryKey = 'event_id,event_date',
| preCombileField = 'event_ts',
| hoodie.write.table.version = 6,
| hoodie.compact.inline = 'true',
| hoodie.compact.inline.max.delta.commits = 2
|)
|PARTITIONED BY (event_type)
|LOCATION 
's3://lliangyu-580974493829-us-west-2/warehouse/hudi/lliangyu_table_mor';
""".stripMargin){code}
{code:java}
// 2. Insert some rows using Spark 3.5.3 / Hudi 1.0 Backward writer
spark.sql("set hoodie.write.table.version=6")
spark.sql("set hoodie.compact.inline='true'")
spark.sql("set hoodie.compact.inline.max.delta.commits=2")

val insertStatements = Seq(
"INSERT INTO lliangyu_table_mor VALUES (100, '2015-01-01', 'event_name_900', 
'2015-01-01T13:51:39.340396Z', 'type1');",
"INSERT INTO lliangyu_table_mor VALUES (101, '2015-01-01', 'event_name_546', 
'2015-01-01T12:14:58.597216Z', 'type2');",
"INSERT INTO lliangyu_table_mor VALUES (102, '2015-01-01', 'event_name_345', 
'2015-01-01T13:51:40.417052Z', 'type3');",
"INSERT INTO lliangyu_table_mor VALUES (103, '2015-01-01', 'event_name_234', 
'2015-01-01T13:51:40.519832Z', 'type4');",
"INSERT INTO lliangyu_table_mor VALUES (104, '2015-01-01', 'event_name_123', 
'2015-01-01T12:15:00.512679Z', 'type1');",
"INSERT INTO lliangyu_table_mor VALUES (105, '2015-01-01', 'event_name_678', 
'2015-01-01T13:51:42.248818Z', 'type2');",
"INSERT INTO lliangyu_table_mor VALUES (106, '2015-01-01', 'event_name_890', 
'2015-01-01T13:51:44.735360Z', 'type3');",
"INSERT INTO lliangyu_table_mor VALUES (107, '2015-01-01', 'event_name_944', 
'2015-01-01T13:51:45.019544Z', 'type4');",
"INSERT INTO lliangyu_table_mor VALUES (108, '2015-01-01', 'event_name_456', 
'2015-01-01T13:51:45.208007Z', 'type1');",
"INSERT INTO lliangyu_table_mor VALUES (109, '2015-01-01', 'event_name_567', 
'2015-01-01T13:51:45.369689Z', 'type2');",
"INSERT INTO lliangyu_table_mor VALUES (110, '2015-01-01', 'event_name_789', 
'2015-01-01T12:15:05.664947Z', 'type3');"
)

insertStatements.foreach { query => spark.sql(query) }

// DUE TO issue with https://issues.apache.org/jira/browse/HUDI-8820
// You will find some rows inserted using backward writer does not appear in 
selection.{code}
 
{code:java}
// 3. Delete some rows using Spark 3.5.3 / Hudi 1.0 Backward writer
// Run deletes on rows that could be retrieved from selection
spark.sql("DELETE FROM default.lliangyu_table_mor WHERE event_type = 
'type2'").show(false);

// Run select again returns incorrect results.
+-------------------+---------------------+----------------------------------+----------------------+---------------------------------------------------------------------------+--------+----------+--------------+---------------------------+----------+
 |_hoodie_commit_time|_hoodie_commit_seqno |_hoodie_record_key 
|_hoodie_partition_path|_hoodie_file_name |event_id|event_date|event_name 
|event_ts |event_type| 
+-------------------+---------------------+----------------------------------+----------------------+---------------------------------------------------------------------------+--------+----------+--------------+---------------------------+----------+
 |20250103202501108 
|20250103202501108_0_0|event_id:108,event_date:2015-01-01|event_type=type1 
|b935d179-56b3-4f81-81e4-8bb0cf97c873-0_0-131-4218_20250103202501108.parquet|108
 |2015-01-01|event_name_456|2015-01-01T13:51:45.208007Z|type1 | 
+-------------------+---------------------+----------------------------------+----------------------+---------------------------------------------------------------------------+--------+----------+--------------+---------------------------+----------+
{code}
 

 

 

 


> Hudi 1.0 Spark SQL unexpected delete behaviors with backward writer
> -------------------------------------------------------------------
>
>                 Key: HUDI-8821
>                 URL: https://issues.apache.org/jira/browse/HUDI-8821
>             Project: Apache Hudi
>          Issue Type: Sub-task
>            Reporter: Leon Lin
>            Priority: Blocker
>             Fix For: 1.0.1
>
>
> Reproduction Steps:
>  
> {code:java}
> // 1. Create an empty table with Hudi 0.14 + Spark 3.5.0 
> spark.sql(
> """
> |CREATE TABLE lliangyu_table_mor (
> | event_id INT,
> | event_date STRING,
> | event_name STRING,
> | event_ts STRING,
> | event_type STRING
> |) USING hudi
> | OPTIONS(
> | type = 'mor',
> | primaryKey = 'event_id,event_date',
> | preCombileField = 'event_ts',
> | hoodie.write.table.version = 6,
> | hoodie.compact.inline = 'true',
> | hoodie.compact.inline.max.delta.commits = 2
> |)
> |PARTITIONED BY (event_type)
> |LOCATION 
> 's3://lliangyu-580974493829-us-west-2/warehouse/hudi/lliangyu_table_mor';
> """.stripMargin){code}
> {code:java}
> // 2. Insert some rows using Spark 3.5.3 / Hudi 1.0 Backward writer
> spark.sql("set hoodie.write.table.version=6")
> spark.sql("set hoodie.compact.inline='true'")
> spark.sql("set hoodie.compact.inline.max.delta.commits=2")
> val insertStatements = Seq(
> "INSERT INTO lliangyu_table_mor VALUES (100, '2015-01-01', 'event_name_900', 
> '2015-01-01T13:51:39.340396Z', 'type1');",
> "INSERT INTO lliangyu_table_mor VALUES (101, '2015-01-01', 'event_name_546', 
> '2015-01-01T12:14:58.597216Z', 'type2');",
> "INSERT INTO lliangyu_table_mor VALUES (102, '2015-01-01', 'event_name_345', 
> '2015-01-01T13:51:40.417052Z', 'type3');",
> "INSERT INTO lliangyu_table_mor VALUES (103, '2015-01-01', 'event_name_234', 
> '2015-01-01T13:51:40.519832Z', 'type4');",
> "INSERT INTO lliangyu_table_mor VALUES (104, '2015-01-01', 'event_name_123', 
> '2015-01-01T12:15:00.512679Z', 'type1');",
> "INSERT INTO lliangyu_table_mor VALUES (105, '2015-01-01', 'event_name_678', 
> '2015-01-01T13:51:42.248818Z', 'type2');",
> "INSERT INTO lliangyu_table_mor VALUES (106, '2015-01-01', 'event_name_890', 
> '2015-01-01T13:51:44.735360Z', 'type3');",
> "INSERT INTO lliangyu_table_mor VALUES (107, '2015-01-01', 'event_name_944', 
> '2015-01-01T13:51:45.019544Z', 'type4');",
> "INSERT INTO lliangyu_table_mor VALUES (108, '2015-01-01', 'event_name_456', 
> '2015-01-01T13:51:45.208007Z', 'type1');",
> "INSERT INTO lliangyu_table_mor VALUES (109, '2015-01-01', 'event_name_567', 
> '2015-01-01T13:51:45.369689Z', 'type2');",
> "INSERT INTO lliangyu_table_mor VALUES (110, '2015-01-01', 'event_name_789', 
> '2015-01-01T12:15:05.664947Z', 'type3');"
> )
> insertStatements.foreach { query => spark.sql(query) }
> // DUE TO issue with https://issues.apache.org/jira/browse/HUDI-8820
> // You will find some rows inserted using backward writer does not appear in 
> selection.{code}
>  
> {code:java}
> // 3. Delete some rows using Spark 3.5.3 / Hudi 1.0 Backward writer
> // Run deletes on rows that could be retrieved from selection
> spark.sql("DELETE FROM default.lliangyu_table_mor WHERE event_type = 
> 'type2'").show(false);
> // Run select again returns incorrect results.
> +-------------------+---------------------+----------------------------------+----------------------+---------------------------------------------------------------------------+--------+----------+--------------+---------------------------+----------+
>  |_hoodie_commit_time|_hoodie_commit_seqno |_hoodie_record_key 
> |_hoodie_partition_path|_hoodie_file_name |event_id|event_date|event_name 
> |event_ts |event_type| 
> +-------------------+---------------------+----------------------------------+----------------------+---------------------------------------------------------------------------+--------+----------+--------------+---------------------------+----------+
>  |20250103202501108 
> |20250103202501108_0_0|event_id:108,event_date:2015-01-01|event_type=type1 
> |b935d179-56b3-4f81-81e4-8bb0cf97c873-0_0-131-4218_20250103202501108.parquet|108
>  |2015-01-01|event_name_456|2015-01-01T13:51:45.208007Z|type1 | 
> +-------------------+---------------------+----------------------------------+----------------------+---------------------------------------------------------------------------+--------+----------+--------------+---------------------------+----------+
> {code}
>  
>  
>  
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to