parisni opened a new issue, #8893:
URL: https://github.com/apache/hudi/issues/8893

   0.13.1 does not honnor hive_partitionning if not specified explicitly while 
delete operation. In previous version it likely get the conf from 
`hoodie.properties`.  As a result, deletion can fail silently.
   
   ```scala
   import org.apache.spark.sql.SaveMode
   import org.apache.spark.sql.functions._
   import org.apache.hudi.DataSourceWriteOptions
   import org.apache.hudi.config.HoodieWriteConfig
   import org.apache.hudi.hive.MultiPartKeysValueExtractor
   
   val path="/tmp/tbl"
   val dt = spark.sql("""
     select cast(1 as bigint) as list_id, 1 as _hudi_last_update, '123' as 
_hudi_partition
   """)
   var hudiOptions = Map[String, String](
     HoodieWriteConfig.TABLE_NAME -> "tbl",
     DataSourceWriteOptions.OPERATION_OPT_KEY -> "insert",
     DataSourceWriteOptions.STORAGE_TYPE_OPT_KEY -> "COPY_ON_WRITE", // COW 
rewrite the file
     DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY -> "list_id",
     DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY -> "_hudi_partition",
     DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY -> "_hudi_last_update",
     DataSourceWriteOptions.HIVE_SYNC_ENABLED_OPT_KEY -> "false",
     "hoodie.index.type" -> "SIMPLE",
     "hoodie.metadata.enable" -> "true",
     "hoodie.datasource.write.keygenerator.class" -> 
"org.apache.hudi.keygen.ComplexKeyGenerator",
     "hoodie.datasource.write.hive_style_partitioning" -> "true"
   )
   
   
dt.write.format("org.apache.hudi").options(hudiOptions).mode(SaveMode.Overwrite).save(path)
   spark.read.format("hudi").load(path).show(false)
   
   hudiOptions = Map[String, String](
     HoodieWriteConfig.TABLE_NAME -> "tbl",
     DataSourceWriteOptions.OPERATION_OPT_KEY -> "delete",
     DataSourceWriteOptions.STORAGE_TYPE_OPT_KEY -> "COPY_ON_WRITE", // COW 
rewrite the file
     DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY -> "list_id",
     DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY -> "_hudi_partition",
     DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY -> "_hudi_last_update",
     DataSourceWriteOptions.HIVE_SYNC_ENABLED_OPT_KEY -> "false",
     "hoodie.index.type" -> "SIMPLE",
     "hoodie.metadata.enable" -> "true",
     "hoodie.datasource.write.keygenerator.class" -> 
"org.apache.hudi.keygen.ComplexKeyGenerator",
   )
   
   dt.dropDuplicates("list_id").select("list_id", 
"_hudi_partition").write.format("org.apache.hudi").options(hudiOptions).mode(SaveMode.Append).save(path)
   spark.read.format("hudi").load(path).show(false)
   ```
   
   ## In `0.13.1` deletion is NOT applied:
   ```
   
+-------------------+---------------------+------------------+----------------------+--------------------------------------------------------------------------+-------+-----------------+---------------+
   |_hoodie_commit_time|_hoodie_commit_seqno 
|_hoodie_record_key|_hoodie_partition_path|_hoodie_file_name                    
                                     |list_id|_hudi_last_update|_hudi_partition|
   
+-------------------+---------------------+------------------+----------------------+--------------------------------------------------------------------------+-------+-----------------+---------------+
   |20230606112459697  |20230606112459697_0_0|list_id:1         
|_hudi_partition=123   
|010e5fcf-2848-49a5-88a2-d62dc9442f06-0_0-189-165_20230606112459697.parquet|1   
   |1                |123            |
   
+-------------------+---------------------+------------------+----------------------+--------------------------------------------------------------------------+-------+-----------------+---------------+
   
   
   
+-------------------+---------------------+------------------+----------------------+--------------------------------------------------------------------------+-------+-----------------+---------------+
   |_hoodie_commit_time|_hoodie_commit_seqno 
|_hoodie_record_key|_hoodie_partition_path|_hoodie_file_name                    
                                     |list_id|_hudi_last_update|_hudi_partition|
   
+-------------------+---------------------+------------------+----------------------+--------------------------------------------------------------------------+-------+-----------------+---------------+
   |20230606112459697  |20230606112459697_0_0|list_id:1         
|_hudi_partition=123   
|010e5fcf-2848-49a5-88a2-d62dc9442f06-0_0-189-165_20230606112459697.parquet|1   
   |1                |123            |
   
+-------------------+---------------------+------------------+----------------------+--------------------------------------------------------------------------+-------+-----------------+---------------+
   ```
   
   ## In `0.12.3` deletion is applied:
   ```
   
+-------------------+---------------------+------------------+----------------------+------------------------------------------------------------------------+-------+-----------------+---------------+
   |_hoodie_commit_time|_hoodie_commit_seqno 
|_hoodie_record_key|_hoodie_partition_path|_hoodie_file_name                    
                                   |list_id|_hudi_last_update|_hudi_partition|
   
+-------------------+---------------------+------------------+----------------------+------------------------------------------------------------------------+-------+-----------------+---------------+
   |20230606112748141  |20230606112748141_0_0|list_id:1         
|_hudi_partition=123   
|b9d5fc08-7fdd-4ba9-a675-ea8eaf402257-0_0-17-15_20230606112748141.parquet|1     
 |1                |123            |
   
+-------------------+---------------------+------------------+----------------------+------------------------------------------------------------------------+-------+-----------------+---------------+
   
   23/06/06 11:27:55 WARN DataSourceOptionsHelper$: 
hoodie.datasource.write.storage.type is deprecated and will be removed in a 
later release; Please use hoodie.datasource.write.table.type
   
+-------------------+--------------------+------------------+----------------------+-----------------+-------+-----------------+---------------+
   
|_hoodie_commit_time|_hoodie_commit_seqno|_hoodie_record_key|_hoodie_partition_path|_hoodie_file_name|list_id|_hudi_last_update|_hudi_partition|
   
+-------------------+--------------------+------------------+----------------------+-----------------+-------+-----------------+---------------+
   
+-------------------+--------------------+------------------+----------------------+-----------------+-------+-----------------+---------------+
   ```
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to