Leon Lin created HUDI-8820:
------------------------------

             Summary: Hudi 1.0 Spark SQL failed to query all rows written by 
Backward Writer
                 Key: HUDI-8820
                 URL: https://issues.apache.org/jira/browse/HUDI-8820
             Project: Apache Hudi
          Issue Type: Bug
    Affects Versions: 1.0.0
            Reporter: Leon Lin


Hudi 1.0 failed to read all the data in table created by backward writer.

When reading the same table using Hudi 0.14.0 returns correct results.

*Reproduction steps:*
{code:java}
1. Create a table using Hudi 0.14.0 / Spark 3.5.0

spark-shell --jars /usr/lib/hudi/hudi-spark-bundle.jar \ --conf 
"spark.serializer=org.apache.spark.serializer.KryoSerializer" \ --conf 
"spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog"
 \ --conf 
"spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension" 

spark.sql(
"""
|CREATE TABLE lliangyu_table_mor (
| event_id INT,
| event_date STRING,
| event_name STRING,
| event_ts STRING,
| event_type STRING
|) USING hudi
| OPTIONS(
| type = 'mor',
| primaryKey = 'event_id,event_date',
| preCombileField = 'event_ts',
| hoodie.write.table.version = 6,
| hoodie.compact.inline = 'true',
| hoodie.compact.inline.max.delta.commits = 2
|)
|PARTITIONED BY (event_type)
|LOCATION 
's3://lliangyu-580974493829-us-west-2/warehouse/hudi/lliangyu_table_mor';
""".stripMargin){code}
{code:java}
 2. Insert records using Hudi 1.0 backward writer
spark-shell --jars /usr/lib/hudi/hudi-spark3-bundle_2.12-1.0.0.jar \ --conf 
"spark.serializer=org.apache.spark.serializer.KryoSerializer" \ --conf 
"spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog"
 \ --conf 
"spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension"


spark.sql("set hoodie.write.table.version=6")
spark.sql("set hoodie.compact.inline='true'")
spark.sql("set hoodie.compact.inline.max.delta.commits=2")


val insertStatements = Seq( "INSERT INTO lliangyu_table_mor VALUES (100, 
'2015-01-01', 'event_name_900', '2015-01-01T13:51:39.340396Z', 'type1');", 
"INSERT INTO lliangyu_table_mor VALUES (101, '2015-01-01', 'event_name_546', 
'2015-01-01T12:14:58.597216Z', 'type2');", "INSERT INTO lliangyu_table_mor 
VALUES (102, '2015-01-01', 'event_name_345', '2015-01-01T13:51:40.417052Z', 
'type3');", "INSERT INTO lliangyu_table_mor VALUES (103, '2015-01-01', 
'event_name_234', '2015-01-01T13:51:40.519832Z', 'type4');", "INSERT INTO 
lliangyu_table_mor VALUES (104, '2015-01-01', 'event_name_123', 
'2015-01-01T12:15:00.512679Z', 'type1');", "INSERT INTO lliangyu_table_mor 
VALUES (105, '2015-01-01', 'event_name_678', '2015-01-01T13:51:42.248818Z', 
'type2');", "INSERT INTO lliangyu_table_mor VALUES (106, '2015-01-01', 
'event_name_890', '2015-01-01T13:51:44.735360Z', 'type3');", "INSERT INTO 
lliangyu_table_mor VALUES (107, '2015-01-01', 'event_name_944', 
'2015-01-01T13:51:45.019544Z', 'type4');", "INSERT INTO lliangyu_table_mor 
VALUES (108, '2015-01-01', 'event_name_456', '2015-01-01T13:51:45.208007Z', 
'type1');", "INSERT INTO lliangyu_table_mor VALUES (109, '2015-01-01', 
'event_name_567', '2015-01-01T13:51:45.369689Z', 'type2');", "INSERT INTO 
lliangyu_table_mor VALUES (110, '2015-01-01', 'event_name_789', 
'2015-01-01T12:15:05.664947Z', 'type3');" )


insertStatements.foreach { query => spark.sql(query) }


spark.sql("SELECT * FROM default.lliangyu_table_mor").show(false);
+-------------------+---------------------+----------------------------------+----------------------+---------------------------------------------------------------------------+--------+----------+--------------+---------------------------+----------+
 |_hoodie_commit_time|_hoodie_commit_seqno |_hoodie_record_key 
|_hoodie_partition_path|_hoodie_file_name |event_id|event_date|event_name 
|event_ts |event_type| 
+-------------------+---------------------+----------------------------------+----------------------+---------------------------------------------------------------------------+--------+----------+--------------+---------------------------+----------+
 |20250103202545187 
|20250103202545187_0_0|event_id:110,event_date:2015-01-01|event_type=type3 
|0c873425-55d1-42bd-886f-230726276f3d-0_0-166-4765_20250103202545187.parquet|110
 |2015-01-01|event_name_789|2015-01-01T12:15:05.664947Z|type3 | 
|20250103202501108 
|20250103202501108_0_0|event_id:108,event_date:2015-01-01|event_type=type1 
|b935d179-56b3-4f81-81e4-8bb0cf97c873-0_0-131-4218_20250103202501108.parquet|108
 |2015-01-01|event_name_456|2015-01-01T13:51:45.208007Z|type1 | 
+-------------------+---------------------+----------------------------------+----------------------+---------------------------------------------------------------------------+--------+----------+--------------+---------------------------+----------+
{code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to