rangareddy commented on issue #12011:
URL: https://github.com/apache/hudi/issues/12011#issuecomment-3461348502

   **Hudi 0.14:**
   
   ```sh
   spark-shell \
   --jars $HOME/hudi_jars/hudi-spark3.4-bundle_2.12-0.14.0.jar \
   --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' \
   --conf 
'spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog'
 \
   --conf 
'spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension' \
   --conf 'spark.kryo.registrator=org.apache.spark.HoodieSparkKryoRegistrar' \
   --conf "spark.sql.catalogImplementation=hive" \
   --conf "spark.hadoop.hive.metastore.uris=thrift://localhost:9083"
   ```
   
   ```scala
   val databaseName = "default"
   val tableName = "my_trips_table_014_mor_skip_ro_suffix_true"
   val basePath = f"file:///tmp/hudi/$tableName"
   
   spark.sql(s"SHOW TABLES LIKE '$tableName*'").show(false)
   
   val columns = Seq("ts","uuid","rider","driver","fare","city")
   
   val data =
     
Seq((1695159649087L,"334e26e9-8355-45cc-97c6-c31daf0df330","rider-A","driver-K",19.10,"san_francisco"),
       
(1695091554788L,"e96c4396-3fad-413a-a942-4cb36106d721","rider-C","driver-M",27.70
 ,"san_francisco"),
       
(1695046462179L,"9909a8b1-2d15-4d3d-8ec9-efc48c536a00","rider-D","driver-L",33.90
 ,"san_francisco"),
       
(1695516137016L,"e3cf430c-889d-4015-bc98-59bdce1e530c","rider-F","driver-P",34.15,"sao_paulo"
    ),
       
(1695115999911L,"c8abbe79-8d89-47ea-b4ce-4d224bae5bfa","rider-J","driver-T",17.85,"chennai"));
   
   var inserts = spark.createDataFrame(data).toDF(columns:_*)
   
   inserts.write.format("hudi").
     option("hoodie.table.name", tableName).
     option("hoodie.database.name", databaseName).
     option("hoodie.datasource.write.table.name", tableName).
     option("hoodie.datasource.write.recordkey.field", "uuid").
     option("hoodie.datasource.write.precombine.field", "ts").
     option("hoodie.datasource.write.partitionpath.field", "city").
     option("hoodie.datasource.write.table.type", "MERGE_ON_READ").
     option("hoodie.datasource.hive_sync.enable", "true").
     option("hoodie.datasource.hive_sync.mode", "hms").
     option("hoodie.datasource.hive_sync.database", databaseName).
     option("hoodie.datasource.hive_sync.table", tableName).
     option("hoodie.datasource.hive_sync.partition_fields", "city").
     option("hoodie.datasource.hive_sync.skip_ro.suffix", "true").
     mode("overwrite").
     save(basePath)
   
   spark.sql(s"SHOW TABLES LIKE '$tableName*'").show(false)
   
   scala> spark.sql(s"SHOW TABLES LIKE '$tableName*'").show(false)
   +---------+---------------------------------------------+-----------+
   |namespace|tableName                                    |isTemporary|
   +---------+---------------------------------------------+-----------+
   |default  |my_trips_table_014_mor_skip_ro_suffix_true_ro|false      |
   |default  |my_trips_table_014_mor_skip_ro_suffix_true_rt|false      |
   +---------+---------------------------------------------+-----------+
   
   val databaseName = "default"
   val tableName = "my_trips_table_014_mor_skip_ro_suffix_false"
   val basePath = f"file:///tmp/hudi/$tableName"
   
   spark.sql(s"SHOW TABLES LIKE '$tableName*'").show(false)
   
   val columns = Seq("ts","uuid","rider","driver","fare","city")
   
   val data =
     
Seq((1695159649087L,"334e26e9-8355-45cc-97c6-c31daf0df330","rider-A","driver-K",19.10,"san_francisco"),
       
(1695091554788L,"e96c4396-3fad-413a-a942-4cb36106d721","rider-C","driver-M",27.70
 ,"san_francisco"),
       
(1695046462179L,"9909a8b1-2d15-4d3d-8ec9-efc48c536a00","rider-D","driver-L",33.90
 ,"san_francisco"),
       
(1695516137016L,"e3cf430c-889d-4015-bc98-59bdce1e530c","rider-F","driver-P",34.15,"sao_paulo"
    ),
       
(1695115999911L,"c8abbe79-8d89-47ea-b4ce-4d224bae5bfa","rider-J","driver-T",17.85,"chennai"));
   
   var inserts = spark.createDataFrame(data).toDF(columns:_*)
   
   inserts.write.format("hudi").
     option("hoodie.table.name", tableName).
     option("hoodie.database.name", databaseName).
     option("hoodie.datasource.write.table.name", tableName).
     option("hoodie.datasource.write.recordkey.field", "uuid").
     option("hoodie.datasource.write.precombine.field", "ts").
     option("hoodie.datasource.write.partitionpath.field", "city").
     option("hoodie.datasource.write.table.type", "MERGE_ON_READ").
     option("hoodie.datasource.hive_sync.enable", "true").
     option("hoodie.datasource.hive_sync.mode", "hms").
     option("hoodie.datasource.hive_sync.database", databaseName).
     option("hoodie.datasource.hive_sync.table", tableName).
     option("hoodie.datasource.hive_sync.partition_fields", "city").
     option("hoodie.datasource.hive_sync.skip_ro.suffix", "false").
     mode("overwrite").
     save(basePath)
   
   spark.sql(s"SHOW TABLES LIKE '$tableName*'").show(false)
   
   scala> spark.sql(s"SHOW TABLES LIKE '$tableName*'").show(false)
   +---------+----------------------------------------------+-----------+
   |namespace|tableName                                     |isTemporary|
   +---------+----------------------------------------------+-----------+
   |default  |my_trips_table_014_mor_skip_ro_suffix_false_ro|false      |
   |default  |my_trips_table_014_mor_skip_ro_suffix_false_rt|false      |
   +---------+----------------------------------------------+-----------+
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to