rangareddy commented on issue #11335:
URL: https://github.com/apache/hudi/issues/11335#issuecomment-2328394614

   Hi @KSubramanyaH 
   
   I am unable to reproduce this issue using following code. I have used Spark 
3.3.2, Hudi 0.14.1 and Java 8
   
   ```
   import org.apache.spark.sql.functions.col
   import org.apache.spark.sql.{Row, SparkSession}
   import org.apache.spark.sql.types._
   
   object SparkHudiTest extends App {
   
     val spark = SparkSession.builder.appName("test")
       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
       .config("conf spark.kryoserializer.buffer.max", "2040M")
       .config("spark.sql.extensions", 
"org.apache.spark.sql.hudi.HoodieSparkSessionExtension")
       .config("spark.hadoop.spark.sql.legacy.parquet.nanosAsLong", "false")
       .config("spark.hadoop.spark.sql.parquet.binaryAsString", "false")
       .config("spark.hadoop.spark.sql.parquet.int96AsTimestamp", "true")
       .config("spark.hadoop.spark.sql.caseSensitive", "false")
       .config("spark.sql.parquet.datetimeRebaseModeInWrite","CORRECTED")
       .config("spark.sql.parquet.datetimeRebaseModeInRead","CORRECTED")
       .config("spark.sql.parquet.int96RebaseModeInWrite", "CORRECTED")
       .config("spark.sql.storeAssignmentPolicy", "legacy")
       .config("spark.executor.extraJavaOptions", "-XX:+UseG1GC 
-XX:+UnlockDiagnosticVMOptions -XX:+G1SummarizeConcMark 
-XX:InitiatingHeapOccupancyPercent=35 -verbose:gc -XX:+PrintGCDetails 
-XX:+PrintGCDateStamps -XX:OnOutOfMemoryError='kill -9 %p'")
       .config("spark.driver.extraJavaOptions", "-XX:+UseG1GC 
-XX:+UnlockDiagnosticVMOptions -XX:+G1SummarizeConcMark 
-XX:InitiatingHeapOccupancyPercent=35 -verbose:gc -XX:+PrintGCDetails 
-XX:+PrintGCDateStamps -XX:OnOutOfMemoryError='kill -9 %p'")
       .config("spark.sql.hive.convertMetastoreParquet", "false")
       .config("spark.master","local[2]")
       .getOrCreate()
   
     val tableName = "test_table"
     val basePath = "file:///tmp/test_table"
   
     val input_data = Seq(
       Row(1L, "hello", 42, BigDecimal(123331.15), 1695159649087L),
       Row(2L, "world", 13, BigDecimal(223331.72), 1695091554788L),
       Row(3L, "spark", 7, BigDecimal(323331.60), 1695115999911L)
     )
   
     val input_rdd = spark.sparkContext.parallelize(input_data)
   
     val input_schema = StructType(Seq(
       StructField("id", LongType),
       StructField("name", StringType),
       StructField("age", IntegerType),
       StructField("salary", DecimalType(13, 4)),
       StructField("ts", LongType),
     ))
   
     val input_df = spark.createDataFrame(input_rdd, input_schema)
     input_df.show(truncate = false)
   
     input_df.write.format("hudi").
       option("hoodie.datasource.write.recordkey.field", "id").
       option("hoodie.table.precombine.field", "name").
       option("hoodie.datasource.write.operation", "bulk_insert").
       option("hoodie.table.name", tableName).
       mode("overwrite").
       save(basePath)
   
     val output_df = spark.read.format("hudi").load(basePath)
     output_df.show(truncate=false)
   
     val updatesDf = output_df.where("name='spark'").withColumn("age", 
col("age") + 10)
       //.withColumn("salary", col("salary") * 10)
     updatesDf.printSchema()
     output_df.printSchema()
   
     updatesDf.write.format("hudi").
       option("hoodie.datasource.write.operation", "upsert").
       option("hoodie.table.name", tableName).
       mode("append").
       save(basePath)
   
     val output2_df = spark.read.format("hudi").load(basePath)
     output2_df.show(truncate=false)
   
     spark.close()
   }
   ```
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to