rangareddy commented on issue #2509:
URL: https://github.com/apache/hudi/issues/2509#issuecomment-3737107671

   
   ```sh
   spark-shell \
        --jars /usr/lib/hudi/hudi-spark-bundle.jar \
        --conf spark.serializer=org.apache.spark.serializer.KryoSerializer
   ```
   
   ```scala
   import org.apache.spark.sql.types._
        
   var seq = Seq((1, "2020-01-01 11:22:30", 2, 2))
   var df = seq.toDF("pk", "time_string" , "partition", "sort_key")
   df= df.withColumn("ts", col("time_string").cast(TimestampType))
   
   val tableName = "test_ts_table"
   val outputPath = f"s3://one-house-db-uat/data/$tableName"
   
   var hudiOptions = Map[String,String](
     "hoodie.table.name" -> tableName,
     "hoodie.consistency.check.enabled" -> "true",
     "hoodie.datasource.write.storage.type" -> "COPY_ON_WRITE",
     "hoodie.datasource.write.recordkey.field" -> "pk",
     "hoodie.datasource.write.partitionpath.field" -> "partition",
     "hoodie.datasource.write.precombine.field" -> "sort_key",
     "hoodie.datasource.hive_sync.enable" -> "true",
     "hoodie.datasource.hive_sync.mode" -> "hms",
     "hoodie.datasource.hive_sync.jdbcurl" -> "thrift://localhost:9083",
     "hoodie.datasource.hive_sync.database" -> "default",
     "hoodie.datasource.hive_sync.table" -> tableName,
     "hoodie.datasource.hive_sync.partition_fields" -> "partition",
     "hoodie.datasource.write.hive_style_partitioning" -> "true",
     "hoodie.datasource.hive_sync.partition_extractor_class" -> 
"org.apache.hudi.hive.MultiPartKeysValueExtractor",
     "hoodie.datasource.hive_sync.support_timestamp" -> "false"
   )
        
   
df.write.format("hudi").options(hudiOptions).mode("overwrite").save(outputPath)
   
   spark.sql(f"select pk, time_string, sort_key, ts, partition from 
$tableName").show()
   
   +---+-------------------+--------+-------------------+---------+
   | pk|        time_string|sort_key|                 ts|partition|
   +---+-------------------+--------+-------------------+---------+
   |  1|2020-01-01 11:22:30|       2|2020-01-01 11:22:30|        2|
   +---+-------------------+--------+-------------------+---------+
   
   spark.sql(f"describe  $tableName").show()
   
   +--------------------+---------+-------+
   |            col_name|data_type|comment|
   +--------------------+---------+-------+
   | _hoodie_commit_time|   string|   NULL|
   |_hoodie_commit_seqno|   string|   NULL|
   |  _hoodie_record_key|   string|   NULL|
   |_hoodie_partition...|   string|   NULL|
   |   _hoodie_file_name|   string|   NULL|
   |                  pk|      int|   NULL|
   |         time_string|   string|   NULL|
   |            sort_key|      int|   NULL|
   |                  ts|timestamp|   NULL|
   |           partition|      int|   NULL|
   |# Partition Infor...|         |       |
   |          # col_name|data_type|comment|
   |           partition|      int|   NULL|
   +--------------------+---------+-------+
   ```
   
   
   ```scala
   val tableName = "test_ts_table2"
   val outputPath = f"s3://one-house-db-uat/data/$tableName"
   
   var hudiOptions = Map[String,String](
     "hoodie.table.name" -> tableName,
     "hoodie.consistency.check.enabled" -> "true",
     "hoodie.datasource.write.storage.type" -> "COPY_ON_WRITE",
     "hoodie.datasource.write.recordkey.field" -> "pk",
     "hoodie.datasource.write.partitionpath.field" -> "partition",
     "hoodie.datasource.write.precombine.field" -> "sort_key",
     "hoodie.datasource.hive_sync.enable" -> "true",
     "hoodie.datasource.hive_sync.mode" -> "hms",
     "hoodie.datasource.hive_sync.jdbcurl" -> "thrift://localhost:9083",
     "hoodie.datasource.hive_sync.database" -> "default",
     "hoodie.datasource.hive_sync.table" -> tableName,
     "hoodie.datasource.hive_sync.partition_fields" -> "partition",
     "hoodie.datasource.write.hive_style_partitioning" -> "true",
     "hoodie.datasource.hive_sync.partition_extractor_class" -> 
"org.apache.hudi.hive.MultiPartKeysValueExtractor",
     "hoodie.datasource.hive_sync.support_timestamp" -> "true"
   )
        
   
df.write.format("hudi").options(hudiOptions).mode("overwrite").save(outputPath)
   
   spark.sql(f"select pk, time_string, sort_key, ts, partition from 
$tableName").show()
   
   +---+-------------------+--------+-------------------+---------+
   | pk|        time_string|sort_key|                 ts|partition|
   +---+-------------------+--------+-------------------+---------+
   |  1|2020-01-01 11:22:30|       2|2020-01-01 11:22:30|        2|
   +---+-------------------+--------+-------------------+---------+
   
   spark.sql(f"describe  $tableName").show()
   +--------------------+---------+-------+
   |            col_name|data_type|comment|
   +--------------------+---------+-------+
   | _hoodie_commit_time|   string|   NULL|
   |_hoodie_commit_seqno|   string|   NULL|
   |  _hoodie_record_key|   string|   NULL|
   |_hoodie_partition...|   string|   NULL|
   |   _hoodie_file_name|   string|   NULL|
   |                  pk|      int|   NULL|
   |         time_string|   string|   NULL|
   |            sort_key|      int|   NULL|
   |                  ts|timestamp|   NULL|
   |           partition|      int|   NULL|
   |# Partition Infor...|         |       |
   |          # col_name|data_type|comment|
   |           partition|      int|   NULL|
   +--------------------+---------+-------+
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to