Re: [I] org.apache.hudi.exception.HoodieException: org.apache.avro.AvroTypeException: Cannot encode decimal with precision 14 as max precision 13 [hudi]

via GitHub Wed, 04 Sep 2024 02:42:15 -0700


rangareddy commented on issue #11335:
URL: https://github.com/apache/hudi/issues/11335#issuecomment-2328394614


   Hi @KSubramanyaH 
   
   I am unable to reproduce this issue using following code. I have used Spark 
3.3.2, Hudi 0.14.1 and Java 8
   
   ```
   import org.apache.spark.sql.functions.col
   import org.apache.spark.sql.{Row, SparkSession}
   import org.apache.spark.sql.types._
   
   object SparkHudiTest extends App {
   
     val spark = SparkSession.builder.appName("test")
       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
       .config("conf spark.kryoserializer.buffer.max", "2040M")
       .config("spark.sql.extensions", 
"org.apache.spark.sql.hudi.HoodieSparkSessionExtension")
       .config("spark.hadoop.spark.sql.legacy.parquet.nanosAsLong", "false")
       .config("spark.hadoop.spark.sql.parquet.binaryAsString", "false")
       .config("spark.hadoop.spark.sql.parquet.int96AsTimestamp", "true")
       .config("spark.hadoop.spark.sql.caseSensitive", "false")
       .config("spark.sql.parquet.datetimeRebaseModeInWrite","CORRECTED")
       .config("spark.sql.parquet.datetimeRebaseModeInRead","CORRECTED")
       .config("spark.sql.parquet.int96RebaseModeInWrite", "CORRECTED")
       .config("spark.sql.storeAssignmentPolicy", "legacy")
       .config("spark.executor.extraJavaOptions", "-XX:+UseG1GC 
-XX:+UnlockDiagnosticVMOptions -XX:+G1SummarizeConcMark 
-XX:InitiatingHeapOccupancyPercent=35 -verbose:gc -XX:+PrintGCDetails 
-XX:+PrintGCDateStamps -XX:OnOutOfMemoryError='kill -9 %p'")
       .config("spark.driver.extraJavaOptions", "-XX:+UseG1GC 
-XX:+UnlockDiagnosticVMOptions -XX:+G1SummarizeConcMark 
-XX:InitiatingHeapOccupancyPercent=35 -verbose:gc -XX:+PrintGCDetails 
-XX:+PrintGCDateStamps -XX:OnOutOfMemoryError='kill -9 %p'")
       .config("spark.sql.hive.convertMetastoreParquet", "false")
       .config("spark.master","local[2]")
       .getOrCreate()
   
     val tableName = "test_table"
     val basePath = "file:///tmp/test_table"
   
     val input_data = Seq(
       Row(1L, "hello", 42, BigDecimal(123331.15), 1695159649087L),
       Row(2L, "world", 13, BigDecimal(223331.72), 1695091554788L),
       Row(3L, "spark", 7, BigDecimal(323331.60), 1695115999911L)
     )
   
     val input_rdd = spark.sparkContext.parallelize(input_data)
   
     val input_schema = StructType(Seq(
       StructField("id", LongType),
       StructField("name", StringType),
       StructField("age", IntegerType),
       StructField("salary", DecimalType(13, 4)),
       StructField("ts", LongType),
     ))
   
     val input_df = spark.createDataFrame(input_rdd, input_schema)
     input_df.show(truncate = false)
   
     input_df.write.format("hudi").
       option("hoodie.datasource.write.recordkey.field", "id").
       option("hoodie.table.precombine.field", "name").
       option("hoodie.datasource.write.operation", "bulk_insert").
       option("hoodie.table.name", tableName).
       mode("overwrite").
       save(basePath)
   
     val output_df = spark.read.format("hudi").load(basePath)
     output_df.show(truncate=false)
   
     val updatesDf = output_df.where("name='spark'").withColumn("age", 
col("age") + 10)
       //.withColumn("salary", col("salary") * 10)
     updatesDf.printSchema()
     output_df.printSchema()
   
     updatesDf.write.format("hudi").
       option("hoodie.datasource.write.operation", "upsert").
       option("hoodie.table.name", tableName).
       mode("append").
       save(basePath)
   
     val output2_df = spark.read.format("hudi").load(basePath)
     output2_df.show(truncate=false)
   
     spark.close()
   }
   ```
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [I] org.apache.hudi.exception.HoodieException: org.apache.avro.AvroTypeException: Cannot encode decimal with precision 14 as max precision 13 [hudi]

Reply via email to