rangareddy commented on issue #12011:
URL: https://github.com/apache/hudi/issues/12011#issuecomment-3461280230
Hi Team,
When testing with the parameter `hoodie.datasource.hive_sync.skip_ro.suffix`
set to true, Hudi unexpectedly still created the read-optimized (RO) suffix
table.
```sh
spark-shell \
--jars $HOME/hudi_jars/hudi-spark3.5-bundle_2.12-1.0.2.jar \
--conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' \
--conf
'spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog'
\
--conf
'spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension' \
--conf 'spark.kryo.registrator=org.apache.spark.HoodieSparkKryoRegistrar' \
--conf "spark.sql.catalogImplementation=hive" \
--conf "spark.hadoop.hive.metastore.uris=thrift://localhost:9083"
```
```scala
val databaseName = "default"
val tableName = "my_trips_table_mor_skip_ro_suffix_true"
val basePath = f"file:///tmp/hudi/$tableName"
spark.sql(f"show tables like '$tableName'").show(false)
val columns = Seq("ts","uuid","rider","driver","fare","city")
val data =
Seq((1695159649087L,"334e26e9-8355-45cc-97c6-c31daf0df330","rider-A","driver-K",19.10,"san_francisco"),
(1695091554788L,"e96c4396-3fad-413a-a942-4cb36106d721","rider-C","driver-M",27.70
,"san_francisco"),
(1695046462179L,"9909a8b1-2d15-4d3d-8ec9-efc48c536a00","rider-D","driver-L",33.90
,"san_francisco"),
(1695516137016L,"e3cf430c-889d-4015-bc98-59bdce1e530c","rider-F","driver-P",34.15,"sao_paulo"
),
(1695115999911L,"c8abbe79-8d89-47ea-b4ce-4d224bae5bfa","rider-J","driver-T",17.85,"chennai"));
var inserts = spark.createDataFrame(data).toDF(columns:_*)
inserts.write.format("hudi").
option("hoodie.table.name", tableName).
option("hoodie.database.name", databaseName).
option("hoodie.datasource.write.table.name", tableName).
option("hoodie.datasource.write.recordkey.field", "uuid").
option("hoodie.datasource.write.precombine.field", "ts").
option("hoodie.datasource.write.partitionpath.field", "city").
option("hoodie.datasource.write.table.type", "MERGE_ON_READ").
option("hoodie.datasource.hive_sync.enable", "true").
option("hoodie.datasource.hive_sync.mode", "hms").
option("hoodie.datasource.hive_sync.database", databaseName).
option("hoodie.datasource.hive_sync.table", tableName).
option("hoodie.datasource.hive_sync.partition_fields", "city").
option("hoodie.datasource.hive_sync.skip_ro.suffix", "true").
mode("overwrite").
save(basePath)
scala> spark.sql(s"SHOW TABLES LIKE '$tableName*'").show(false)
+---------+-----------------------------------------+-----------+
|namespace|tableName |isTemporary|
+---------+-----------------------------------------+-----------+
|default |my_trips_table_mor_skip_ro_suffix_true |false |
|default |my_trips_table_mor_skip_ro_suffix_true_ro|false |
|default |my_trips_table_mor_skip_ro_suffix_true_rt|false |
+---------+-----------------------------------------+-----------+
val databaseName = "default"
val tableName = "my_trips_table_mor_skip_ro_suffix_false"
val basePath = f"file:///tmp/hudi/$tableName"
spark.sql(s"SHOW TABLES LIKE '$tableName*'").show(false)
val columns = Seq("ts","uuid","rider","driver","fare","city")
val data =
Seq((1695159649087L,"334e26e9-8355-45cc-97c6-c31daf0df330","rider-A","driver-K",19.10,"san_francisco"),
(1695091554788L,"e96c4396-3fad-413a-a942-4cb36106d721","rider-C","driver-M",27.70
,"san_francisco"),
(1695046462179L,"9909a8b1-2d15-4d3d-8ec9-efc48c536a00","rider-D","driver-L",33.90
,"san_francisco"),
(1695516137016L,"e3cf430c-889d-4015-bc98-59bdce1e530c","rider-F","driver-P",34.15,"sao_paulo"
),
(1695115999911L,"c8abbe79-8d89-47ea-b4ce-4d224bae5bfa","rider-J","driver-T",17.85,"chennai"));
var inserts = spark.createDataFrame(data).toDF(columns:_*)
inserts.write.format("hudi").
option("hoodie.table.name", tableName).
option("hoodie.database.name", databaseName).
option("hoodie.datasource.write.table.name", tableName).
option("hoodie.datasource.write.recordkey.field", "uuid").
option("hoodie.datasource.write.precombine.field", "ts").
option("hoodie.datasource.write.partitionpath.field", "city").
option("hoodie.datasource.write.table.type", "MERGE_ON_READ").
option("hoodie.datasource.hive_sync.enable", "true").
option("hoodie.datasource.hive_sync.mode", "hms").
option("hoodie.datasource.hive_sync.database", databaseName).
option("hoodie.datasource.hive_sync.table", tableName).
option("hoodie.datasource.hive_sync.partition_fields", "city").
option("hoodie.datasource.hive_sync.skip_ro.suffix", "false").
mode("overwrite").
save(basePath)
spark.sql(s"SHOW TABLES LIKE '$tableName*'").show(false)
spark.sql(s"SHOW TABLES LIKE '$tableName*'").show(false)
+---------+------------------------------------------+-----------+
|namespace|tableName |isTemporary|
+---------+------------------------------------------+-----------+
|default |my_trips_table_mor_skip_ro_suffix_false |false |
|default |my_trips_table_mor_skip_ro_suffix_false_ro|false |
|default |my_trips_table_mor_skip_ro_suffix_false_rt|false |
+---------+------------------------------------------+-----------+
scala> spark.sql("show create table
my_trips_table_mor_skip_ro_suffix_false_rt").show(100, false)
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|createtab_stmt
|
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|CREATE TABLE
spark_catalog.default.my_trips_table_mor_skip_ro_suffix_false_rt (\n
_hoodie_commit_time STRING,\n _hoodie_commit_seqno STRING,\n
_hoodie_record_key STRING,\n _hoodie_partition_path STRING,\n
_hoodie_file_name STRING,\n ts BIGINT NOT NULL,\n uuid STRING,\n rider
STRING,\n driver STRING,\n fare DOUBLE NOT NULL,\n city STRING)\nUSING
hudi\nOPTIONS (\n 'hoodie.query.as.ro.table' = 'false')\nPARTITIONED BY
(city)\nLOCATION
'file:/tmp/hudi/my_trips_table_mor_skip_ro_suffix_false'\nTBLPROPERTIES (\n
'last_commit_completion_time_sync' = '20251029174433737',\n
'last_commit_time_sync' = '20251029174429189')\n|
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
scala> spark.sql("show create table
my_trips_table_mor_skip_ro_suffix_false_ro").show(100, false)
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|createtab_stmt
|
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|CREATE TABLE
spark_catalog.default.my_trips_table_mor_skip_ro_suffix_false_ro (\n
_hoodie_commit_time STRING,\n _hoodie_commit_seqno STRING,\n
_hoodie_record_key STRING,\n _hoodie_partition_path STRING,\n
_hoodie_file_name STRING,\n ts BIGINT NOT NULL,\n uuid STRING,\n rider
STRING,\n driver STRING,\n fare DOUBLE NOT NULL,\n city STRING)\nUSING
hudi\nOPTIONS (\n 'hoodie.query.as.ro.table' = 'true')\nPARTITIONED BY
(city)\nLOCATION
'file:/tmp/hudi/my_trips_table_mor_skip_ro_suffix_false'\nTBLPROPERTIES (\n
'last_commit_completion_time_sync' = '20251029174433737',\n
'last_commit_time_sync' = '20251029174429189')\n|
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
scala> spark.sql("show create table
my_trips_table_mor_skip_ro_suffix_false").show(100, false)
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|createtab_stmt
|
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|CREATE TABLE spark_catalog.default.my_trips_table_mor_skip_ro_suffix_false
(\n _hoodie_commit_time STRING,\n _hoodie_commit_seqno STRING,\n
_hoodie_record_key STRING,\n _hoodie_partition_path STRING,\n
_hoodie_file_name STRING,\n ts BIGINT NOT NULL,\n uuid STRING,\n rider
STRING,\n driver STRING,\n fare DOUBLE NOT NULL,\n city STRING)\nUSING
hudi\nOPTIONS (\n 'hoodie.query.as.ro.table' = 'false')\nPARTITIONED BY
(city)\nLOCATION
'file:/tmp/hudi/my_trips_table_mor_skip_ro_suffix_false'\nTBLPROPERTIES (\n
'last_commit_completion_time_sync' = '20251029174433737',\n
'last_commit_time_sync' = '20251029174429189')\n|
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]