[ 
https://issues.apache.org/jira/browse/HUDI-7943?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

ASF GitHub Bot updated HUDI-7943:
---------------------------------
    Labels: pull-request-available  (was: )

> Resolve version conflict of fasterxml on spark3.2 
> --------------------------------------------------
>
>                 Key: HUDI-7943
>                 URL: https://issues.apache.org/jira/browse/HUDI-7943
>             Project: Apache Hudi
>          Issue Type: Bug
>          Components: dependencies
>         Environment: hudi0.14.1, Spark3.2
>            Reporter: Jihwan Lee
>            Priority: Major
>              Labels: pull-request-available
>
> When run streaming read on spark3.2, raise exception that requires correct 
> version of jackson databind.
> Spark versions except 3.2 seem to use versions related to Spark dependencies.
>  
> version refer: https://github.com/apache/spark/blob/v3.2.3/pom.xml#L170
>  
> example code:
>  
> {code:java}
> import scala.collection.JavaConversions._
> import org.apache.spark.sql.SaveMode._
> import org.apache.hudi.DataSourceReadOptions._
> import org.apache.hudi.DataSourceWriteOptions._
> import org.apache.hudi.common.table.HoodieTableConfig._
> import org.apache.hudi.config.HoodieWriteConfig._
> import org.apache.hudi.keygen.constant.KeyGeneratorOptions._
> import org.apache.hudi.common.model.HoodieRecord
> import spark.implicits._
> val basePath = "hdfs:///tmp/trips_table"
> spark.readStream
> .format("hudi")
> .option("hoodie.datasource.query.type", "incremental")
> .option("hoodie.datasource.query.incremental.format", "cdc")
> .load(basePath)
> .writeStream
> .format("console")
> .option("checkpointLocation", "/tmp/trips_table_checkpoint")
> .outputMode("append")
> .start().awaitTermination()
> {code}
>  
>  
> error log:
>  
> {code:java}
> Caused by: java.lang.ExceptionInInitializerError: 
> com.fasterxml.jackson.databind.JsonMappingException: Scala module 2.10.0 
> requires Jackson Databind version >= 2.10.0 and < 2.11.0
>   at 
> org.apache.spark.sql.hudi.streaming.HoodieSourceOffset.<init>(HoodieSourceOffset.scala:30)
>   at 
> org.apache.spark.sql.hudi.streaming.HoodieStreamSource.getLatestOffset(HoodieStreamSource.scala:127)
>   at 
> org.apache.spark.sql.hudi.streaming.HoodieStreamSource.getOffset(HoodieStreamSource.scala:138)
>   at 
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$constructNextBatch$6(MicroBatchExecution.scala:403)
>   at 
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
>   at 
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
>   at 
> org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
>   at 
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$constructNextBatch$2(MicroBatchExecution.scala:402)
>   at 
> scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
>   at scala.collection.Iterator.foreach(Iterator.scala:943)
>   at scala.collection.Iterator.foreach$(Iterator.scala:943)
>   at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
>   at scala.collection.IterableLike.foreach(IterableLike.scala:74)
>   at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
>   at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
>   at scala.collection.TraversableLike.map(TraversableLike.scala:286)
>   at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
>   at scala.collection.AbstractTraversable.map(Traversable.scala:108)
>   at 
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$constructNextBatch$1(MicroBatchExecution.scala:384)
>   at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23)
>   at 
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.withProgressLocked(MicroBatchExecution.scala:627)
>   at 
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.constructNextBatch(MicroBatchExecution.scala:380)
>   at 
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$2(MicroBatchExecution.scala:210)
>   at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
>   at 
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
>   at 
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
>   at 
> org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
>   at 
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$1(MicroBatchExecution.scala:193)
>   at 
> org.apache.spark.sql.execution.streaming.ProcessingTimeExecutor.execute(TriggerExecutor.scala:57)
>   at 
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:187)
>   at 
> org.apache.spark.sql.execution.streaming.StreamExecution.$anonfun$runStream$1(StreamExecution.scala:303)
>   at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
>   at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>   at 
> org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:286)
>   ... 1 more
> Caused by: com.fasterxml.jackson.databind.JsonMappingException: Scala module 
> 2.10.0 requires Jackson Databind version >= 2.10.0 and < 2.11.0
>   at 
> org.apache.hudi.com.fasterxml.jackson.module.scala.JacksonModule.setupModule(JacksonModule.scala:61)
>   at 
> org.apache.hudi.com.fasterxml.jackson.module.scala.JacksonModule.setupModule$(JacksonModule.scala:46)
>   at 
> org.apache.hudi.com.fasterxml.jackson.module.scala.DefaultScalaModule.setupModule(DefaultScalaModule.scala:17)
>   at 
> com.fasterxml.jackson.databind.ObjectMapper.registerModule(ObjectMapper.java:835)
>   at 
> org.apache.spark.sql.hudi.streaming.HoodieSourceOffset$.mapper$lzycompute(HoodieSourceOffset.scala:53)
>   at 
> org.apache.spark.sql.hudi.streaming.HoodieSourceOffset$.mapper(HoodieSourceOffset.scala:49)
>   at 
> org.apache.spark.sql.hudi.streaming.HoodieSourceOffset$.toJson(HoodieSourceOffset.scala:58)
>   at 
> org.apache.spark.sql.hudi.streaming.HoodieSourceOffset.<init>(HoodieSourceOffset.scala:30)
>   at 
> org.apache.spark.sql.hudi.streaming.HoodieSourceOffset$.<init>(HoodieSourceOffset.scala:72)
>   at 
> org.apache.spark.sql.hudi.streaming.HoodieSourceOffset$.<clinit>(HoodieSourceOffset.scala)
>   ... 35 more {code}
>  
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to