rangareddy commented on issue #12011:
URL: https://github.com/apache/hudi/issues/12011#issuecomment-3461280230

   Hi Team,
   
   When testing with the parameter `hoodie.datasource.hive_sync.skip_ro.suffix` 
set to true, Hudi unexpectedly still created the read-optimized (RO) suffix 
table.
   
   ```sh
   spark-shell \
   --jars $HOME/hudi_jars/hudi-spark3.5-bundle_2.12-1.0.2.jar \
   --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' \
   --conf 
'spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog'
 \
   --conf 
'spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension' \
   --conf 'spark.kryo.registrator=org.apache.spark.HoodieSparkKryoRegistrar' \
   --conf "spark.sql.catalogImplementation=hive" \
   --conf "spark.hadoop.hive.metastore.uris=thrift://localhost:9083"
   ```
   
   ```scala
   val databaseName = "default"
   val tableName = "my_trips_table_mor_skip_ro_suffix_true"
   val basePath = f"file:///tmp/hudi/$tableName"
   
   spark.sql(f"show tables like '$tableName'").show(false)
   
   val columns = Seq("ts","uuid","rider","driver","fare","city")
   
   val data =
     
Seq((1695159649087L,"334e26e9-8355-45cc-97c6-c31daf0df330","rider-A","driver-K",19.10,"san_francisco"),
       
(1695091554788L,"e96c4396-3fad-413a-a942-4cb36106d721","rider-C","driver-M",27.70
 ,"san_francisco"),
       
(1695046462179L,"9909a8b1-2d15-4d3d-8ec9-efc48c536a00","rider-D","driver-L",33.90
 ,"san_francisco"),
       
(1695516137016L,"e3cf430c-889d-4015-bc98-59bdce1e530c","rider-F","driver-P",34.15,"sao_paulo"
    ),
       
(1695115999911L,"c8abbe79-8d89-47ea-b4ce-4d224bae5bfa","rider-J","driver-T",17.85,"chennai"));
   
   var inserts = spark.createDataFrame(data).toDF(columns:_*)
   
   inserts.write.format("hudi").
     option("hoodie.table.name", tableName).
     option("hoodie.database.name", databaseName).
     option("hoodie.datasource.write.table.name", tableName).
     option("hoodie.datasource.write.recordkey.field", "uuid").
     option("hoodie.datasource.write.precombine.field", "ts").
     option("hoodie.datasource.write.partitionpath.field", "city").
     option("hoodie.datasource.write.table.type", "MERGE_ON_READ").
     option("hoodie.datasource.hive_sync.enable", "true").
     option("hoodie.datasource.hive_sync.mode", "hms").
     option("hoodie.datasource.hive_sync.database", databaseName).
     option("hoodie.datasource.hive_sync.table", tableName).
     option("hoodie.datasource.hive_sync.partition_fields", "city").
     option("hoodie.datasource.hive_sync.skip_ro.suffix", "true").
     mode("overwrite").
     save(basePath)
   
   scala> spark.sql(s"SHOW TABLES LIKE '$tableName*'").show(false)
   +---------+-----------------------------------------+-----------+
   |namespace|tableName                                |isTemporary|
   +---------+-----------------------------------------+-----------+
   |default  |my_trips_table_mor_skip_ro_suffix_true   |false      |
   |default  |my_trips_table_mor_skip_ro_suffix_true_ro|false      |
   |default  |my_trips_table_mor_skip_ro_suffix_true_rt|false      |
   +---------+-----------------------------------------+-----------+
   
   val databaseName = "default"
   val tableName = "my_trips_table_mor_skip_ro_suffix_false"
   val basePath = f"file:///tmp/hudi/$tableName"
   
   spark.sql(s"SHOW TABLES LIKE '$tableName*'").show(false)
   
   val columns = Seq("ts","uuid","rider","driver","fare","city")
   
   val data =
     
Seq((1695159649087L,"334e26e9-8355-45cc-97c6-c31daf0df330","rider-A","driver-K",19.10,"san_francisco"),
       
(1695091554788L,"e96c4396-3fad-413a-a942-4cb36106d721","rider-C","driver-M",27.70
 ,"san_francisco"),
       
(1695046462179L,"9909a8b1-2d15-4d3d-8ec9-efc48c536a00","rider-D","driver-L",33.90
 ,"san_francisco"),
       
(1695516137016L,"e3cf430c-889d-4015-bc98-59bdce1e530c","rider-F","driver-P",34.15,"sao_paulo"
    ),
       
(1695115999911L,"c8abbe79-8d89-47ea-b4ce-4d224bae5bfa","rider-J","driver-T",17.85,"chennai"));
   
   var inserts = spark.createDataFrame(data).toDF(columns:_*)
   
   inserts.write.format("hudi").
     option("hoodie.table.name", tableName).
     option("hoodie.database.name", databaseName).
     option("hoodie.datasource.write.table.name", tableName).
     option("hoodie.datasource.write.recordkey.field", "uuid").
     option("hoodie.datasource.write.precombine.field", "ts").
     option("hoodie.datasource.write.partitionpath.field", "city").
     option("hoodie.datasource.write.table.type", "MERGE_ON_READ").
     option("hoodie.datasource.hive_sync.enable", "true").
     option("hoodie.datasource.hive_sync.mode", "hms").
     option("hoodie.datasource.hive_sync.database", databaseName).
     option("hoodie.datasource.hive_sync.table", tableName).
     option("hoodie.datasource.hive_sync.partition_fields", "city").
     option("hoodie.datasource.hive_sync.skip_ro.suffix", "false").
     mode("overwrite").
     save(basePath)
   
   spark.sql(s"SHOW TABLES LIKE '$tableName*'").show(false)
   
    spark.sql(s"SHOW TABLES LIKE '$tableName*'").show(false)
   +---------+------------------------------------------+-----------+
   |namespace|tableName                                 |isTemporary|
   +---------+------------------------------------------+-----------+
   |default  |my_trips_table_mor_skip_ro_suffix_false   |false      |
   |default  |my_trips_table_mor_skip_ro_suffix_false_ro|false      |
   |default  |my_trips_table_mor_skip_ro_suffix_false_rt|false      |
   +---------+------------------------------------------+-----------+
   
   scala> spark.sql("show create table 
my_trips_table_mor_skip_ro_suffix_false_rt").show(100, false)
   
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
   |createtab_stmt                                                              
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                              |
   
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
   |CREATE TABLE 
spark_catalog.default.my_trips_table_mor_skip_ro_suffix_false_rt (\n  
_hoodie_commit_time STRING,\n  _hoodie_commit_seqno STRING,\n  
_hoodie_record_key STRING,\n  _hoodie_partition_path STRING,\n  
_hoodie_file_name STRING,\n  ts BIGINT NOT NULL,\n  uuid STRING,\n  rider 
STRING,\n  driver STRING,\n  fare DOUBLE NOT NULL,\n  city STRING)\nUSING 
hudi\nOPTIONS (\n  'hoodie.query.as.ro.table' = 'false')\nPARTITIONED BY 
(city)\nLOCATION 
'file:/tmp/hudi/my_trips_table_mor_skip_ro_suffix_false'\nTBLPROPERTIES (\n  
'last_commit_completion_time_sync' = '20251029174433737',\n  
'last_commit_time_sync' = '20251029174429189')\n|
   
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
   
   
   scala> spark.sql("show create table 
my_trips_table_mor_skip_ro_suffix_false_ro").show(100, false)
   
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
   |createtab_stmt                                                              
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                             |
   
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
   |CREATE TABLE 
spark_catalog.default.my_trips_table_mor_skip_ro_suffix_false_ro (\n  
_hoodie_commit_time STRING,\n  _hoodie_commit_seqno STRING,\n  
_hoodie_record_key STRING,\n  _hoodie_partition_path STRING,\n  
_hoodie_file_name STRING,\n  ts BIGINT NOT NULL,\n  uuid STRING,\n  rider 
STRING,\n  driver STRING,\n  fare DOUBLE NOT NULL,\n  city STRING)\nUSING 
hudi\nOPTIONS (\n  'hoodie.query.as.ro.table' = 'true')\nPARTITIONED BY 
(city)\nLOCATION 
'file:/tmp/hudi/my_trips_table_mor_skip_ro_suffix_false'\nTBLPROPERTIES (\n  
'last_commit_completion_time_sync' = '20251029174433737',\n  
'last_commit_time_sync' = '20251029174429189')\n|
   
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
   
   
   scala> spark.sql("show create table 
my_trips_table_mor_skip_ro_suffix_false").show(100, false)
   
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
   |createtab_stmt                                                              
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                                
                                                                           |
   
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
   |CREATE TABLE spark_catalog.default.my_trips_table_mor_skip_ro_suffix_false 
(\n  _hoodie_commit_time STRING,\n  _hoodie_commit_seqno STRING,\n  
_hoodie_record_key STRING,\n  _hoodie_partition_path STRING,\n  
_hoodie_file_name STRING,\n  ts BIGINT NOT NULL,\n  uuid STRING,\n  rider 
STRING,\n  driver STRING,\n  fare DOUBLE NOT NULL,\n  city STRING)\nUSING 
hudi\nOPTIONS (\n  'hoodie.query.as.ro.table' = 'false')\nPARTITIONED BY 
(city)\nLOCATION 
'file:/tmp/hudi/my_trips_table_mor_skip_ro_suffix_false'\nTBLPROPERTIES (\n  
'last_commit_completion_time_sync' = '20251029174433737',\n  
'last_commit_time_sync' = '20251029174429189')\n|
   
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to