rangareddy commented on issue #12068: URL: https://github.com/apache/hudi/issues/12068#issuecomment-2404182429
I have tested the following sample code and worked without any issues. **Cluster Details:** * Spark 3.5 * Hudi 1.0.0-beta2 ```sh spark-shell \ --jars packaging/hudi-spark-bundle/target/hudi-spark3.5-bundle_2.12-1.0.0-beta2.jar \ --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer' \ --conf 'spark.sql.catalog.spark_catalog=org.apache.spark.sql.hudi.catalog.HoodieCatalog' \ --conf 'spark.sql.extensions=org.apache.spark.sql.hudi.HoodieSparkSessionExtension' \ --conf 'spark.kryo.registrator=org.apache.spark.HoodieSparkKryoRegistrar' \ --conf spark.ui.port=14040 ``` ```scala import scala.collection.JavaConversions._ import org.apache.spark.sql.SaveMode._ import org.apache.hudi.DataSourceReadOptions._ import org.apache.hudi.DataSourceWriteOptions import org.apache.hudi.DataSourceWriteOptions._ import org.apache.hudi.common.table.HoodieTableConfig._ import org.apache.hudi.config.HoodieWriteConfig import org.apache.hudi.config.HoodieWriteConfig._ import org.apache.hudi.keygen.constant.KeyGeneratorOptions._ import org.apache.hudi.common.model.HoodieRecord import org.apache.hudi.common.table.HoodieTableConfig import org.apache.hudi.config.HoodieIndexConfig import org.apache.hudi.common.config.HoodieStorageConfig import org.apache.hudi.common.config.HoodieMetadataConfig import org.apache.hudi.execution.bulkinsert.BulkInsertSortMode import spark.implicits._ val columns = Seq("ts","uuid","rider","driver","fare","city") val data = Seq((1695159649087L,"334e26e9-8355-45cc-97c6-c31daf0df330","rider-A","driver-K",19.10,"san_francisco"), (1695091554788L,"e96c4396-3fad-413a-a942-4cb36106d721","rider-C","driver-M",27.70 ,"san_francisco"), (1695046462179L,"9909a8b1-2d15-4d3d-8ec9-efc48c536a00","rider-D","driver-L",33.90 ,"san_francisco"), (1695516137016L,"e3cf430c-889d-4015-bc98-59bdce1e530c","rider-F","driver-P",34.15,"sao_paulo" ), (1695115999911L,"c8abbe79-8d89-47ea-b4ce-4d224bae5bfa","rider-J","driver-T",17.85,"chennai")); var inserts = spark.createDataFrame(data).toDF(columns:_*) val tableName = "trips_table" val basePath = "file:///tmp/trips_table" val bulkWriteOptions: Map[String, String] = Map( DataSourceWriteOptions.OPERATION.key() -> DataSourceWriteOptions.BULK_INSERT_OPERATION_OPT_VAL, DataSourceWriteOptions.TABLE_TYPE.key() -> DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL, HoodieStorageConfig.PARQUET_COMPRESSION_CODEC_NAME.key() -> "snappy", HoodieStorageConfig.PARQUET_MAX_FILE_SIZE.key() -> "2147483648", "hoodie.parquet.small.file.limit" -> "1073741824", HoodieTableConfig.POPULATE_META_FIELDS.key() -> "false", HoodieWriteConfig.BULK_INSERT_SORT_MODE.key() -> BulkInsertSortMode.GLOBAL_SORT.name(), HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key() -> "true", HoodieIndexConfig.INDEX_TYPE.key() -> "RECORD_INDEX", DataSourceWriteOptions.META_SYNC_ENABLED.key() -> "false", "hoodie.metadata.record.index.enable" -> "true", "hoodie.metadata.enable" -> "true", "hoodie.datasource.write.hive_style_partitioning" -> "true", "hoodie.clustering.inline" -> "true", "hoodie.clustering.plan.strategy.target.file.max.bytes" -> "2147483648", "hoodie.clustering.plan.strategy.small.file.limit" -> "1073741824", "hoodie.datasource.write.partitionpath.field" -> "city", "hoodie.datasource.write.recordkey.field" -> "uuid", "hoodie.datasource.write.precombine.field" -> "ts", "hoodie.table.name" -> tableName ) inserts.write.format("hudi"). options(bulkWriteOptions). mode(Overwrite). save(basePath) val tripsDF = spark.read.format("hudi").load(basePath) tripsDF.show(false) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org