[
https://issues.apache.org/jira/browse/HUDI-7943?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Jihwan Lee updated HUDI-7943:
-----------------------------
Summary: Resolve version conflict of fasterxml on spark3.2 (was: Fix that
resolve version conflict of fasterxml on spark3.2 )
> Resolve version conflict of fasterxml on spark3.2
> --------------------------------------------------
>
> Key: HUDI-7943
> URL: https://issues.apache.org/jira/browse/HUDI-7943
> Project: Apache Hudi
> Issue Type: Bug
> Components: dependencies
> Environment: hudi0.14.1, Spark3.2
> Reporter: Jihwan Lee
> Priority: Major
>
> When run streaming read on spark3.2, raise exception that requires correct
> version of jackson databind.
> Spark versions except 3.2 seem to use versions related to Spark dependencies.
>
> version refer: https://github.com/apache/spark/blob/v3.2.3/pom.xml#L170
>
> example code:
>
> {code:java}
> import scala.collection.JavaConversions._
> import org.apache.spark.sql.SaveMode._
> import org.apache.hudi.DataSourceReadOptions._
> import org.apache.hudi.DataSourceWriteOptions._
> import org.apache.hudi.common.table.HoodieTableConfig._
> import org.apache.hudi.config.HoodieWriteConfig._
> import org.apache.hudi.keygen.constant.KeyGeneratorOptions._
> import org.apache.hudi.common.model.HoodieRecord
> import spark.implicits._
> val basePath = "hdfs:///tmp/trips_table"
> spark.readStream
> .format("hudi")
> .option("hoodie.datasource.query.type", "incremental")
> .option("hoodie.datasource.query.incremental.format", "cdc")
> .load(basePath)
> .writeStream
> .format("console")
> .option("checkpointLocation", "/tmp/trips_table_checkpoint")
> .outputMode("append")
> .start().awaitTermination()
> {code}
>
>
> error log:
>
> {code:java}
> Caused by: java.lang.ExceptionInInitializerError:
> com.fasterxml.jackson.databind.JsonMappingException: Scala module 2.10.0
> requires Jackson Databind version >= 2.10.0 and < 2.11.0
> at
> org.apache.spark.sql.hudi.streaming.HoodieSourceOffset.<init>(HoodieSourceOffset.scala:30)
> at
> org.apache.spark.sql.hudi.streaming.HoodieStreamSource.getLatestOffset(HoodieStreamSource.scala:127)
> at
> org.apache.spark.sql.hudi.streaming.HoodieStreamSource.getOffset(HoodieStreamSource.scala:138)
> at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$constructNextBatch$6(MicroBatchExecution.scala:403)
> at
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
> at
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
> at
> org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
> at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$constructNextBatch$2(MicroBatchExecution.scala:402)
> at
> scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
> at scala.collection.Iterator.foreach(Iterator.scala:943)
> at scala.collection.Iterator.foreach$(Iterator.scala:943)
> at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
> at scala.collection.IterableLike.foreach(IterableLike.scala:74)
> at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
> at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
> at scala.collection.TraversableLike.map(TraversableLike.scala:286)
> at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
> at scala.collection.AbstractTraversable.map(Traversable.scala:108)
> at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$constructNextBatch$1(MicroBatchExecution.scala:384)
> at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23)
> at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.withProgressLocked(MicroBatchExecution.scala:627)
> at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.constructNextBatch(MicroBatchExecution.scala:380)
> at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$2(MicroBatchExecution.scala:210)
> at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
> at
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
> at
> org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
> at
> org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
> at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$1(MicroBatchExecution.scala:193)
> at
> org.apache.spark.sql.execution.streaming.ProcessingTimeExecutor.execute(TriggerExecutor.scala:57)
> at
> org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:187)
> at
> org.apache.spark.sql.execution.streaming.StreamExecution.$anonfun$runStream$1(StreamExecution.scala:303)
> at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
> at
> org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:286)
> ... 1 more
> Caused by: com.fasterxml.jackson.databind.JsonMappingException: Scala module
> 2.10.0 requires Jackson Databind version >= 2.10.0 and < 2.11.0
> at
> org.apache.hudi.com.fasterxml.jackson.module.scala.JacksonModule.setupModule(JacksonModule.scala:61)
> at
> org.apache.hudi.com.fasterxml.jackson.module.scala.JacksonModule.setupModule$(JacksonModule.scala:46)
> at
> org.apache.hudi.com.fasterxml.jackson.module.scala.DefaultScalaModule.setupModule(DefaultScalaModule.scala:17)
> at
> com.fasterxml.jackson.databind.ObjectMapper.registerModule(ObjectMapper.java:835)
> at
> org.apache.spark.sql.hudi.streaming.HoodieSourceOffset$.mapper$lzycompute(HoodieSourceOffset.scala:53)
> at
> org.apache.spark.sql.hudi.streaming.HoodieSourceOffset$.mapper(HoodieSourceOffset.scala:49)
> at
> org.apache.spark.sql.hudi.streaming.HoodieSourceOffset$.toJson(HoodieSourceOffset.scala:58)
> at
> org.apache.spark.sql.hudi.streaming.HoodieSourceOffset.<init>(HoodieSourceOffset.scala:30)
> at
> org.apache.spark.sql.hudi.streaming.HoodieSourceOffset$.<init>(HoodieSourceOffset.scala:72)
> at
> org.apache.spark.sql.hudi.streaming.HoodieSourceOffset$.<clinit>(HoodieSourceOffset.scala)
> ... 35 more {code}
>
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)