zengyangjie commented on issue #9974:
URL: https://github.com/apache/hudi/issues/9974#issuecomment-1790200356
All ERROR messages are:
```shell
ERROR TorrentBroadcast: Store broadcast broadcast_0 fail, remove all pieces
of the broadcast
org.apache.spark.SparkException: Job aborted due to stage failure: Task
serialization failed: org.apache.spark.SparkException: Failed to register
classes with Kryo
org.apache.spark.SparkException: Failed to register classes with Kryo
at
org.apache.spark.serializer.KryoSerializer.$anonfun$newKryo$5(KryoSerializer.scala:183)
at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at
org.apache.spark.util.Utils$.withContextClassLoader(Utils.scala:233)
at
org.apache.spark.serializer.KryoSerializer.newKryo(KryoSerializer.scala:171)
at
org.apache.spark.serializer.KryoSerializer$$anon$1.create(KryoSerializer.scala:102)
at
com.esotericsoftware.kryo.pool.KryoPoolQueueImpl.borrow(KryoPoolQueueImpl.java:48)
at
org.apache.spark.serializer.KryoSerializer$PoolWrapper.borrow(KryoSerializer.scala:109)
at
org.apache.spark.serializer.KryoSerializerInstance.borrowKryo(KryoSerializer.scala:346)
at
org.apache.spark.serializer.KryoSerializationStream.<init>(KryoSerializer.scala:266)
at
org.apache.spark.serializer.KryoSerializerInstance.serializeStream(KryoSerializer.scala:432)
at
org.apache.spark.broadcast.TorrentBroadcast$.blockifyObject(TorrentBroadcast.scala:319)
at
org.apache.spark.broadcast.TorrentBroadcast.writeBlocks(TorrentBroadcast.scala:140)
at
org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:95)
at
org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
at
org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:75)
at org.apache.spark.SparkContext.broadcast(SparkContext.scala:1529)
at
org.apache.spark.scheduler.DAGScheduler.submitMissingTasks(DAGScheduler.scala:1513)
at
org.apache.spark.scheduler.DAGScheduler.submitStage(DAGScheduler.scala:1329)
at
org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:1271)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2810)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2802)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2791)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
Caused by: java.lang.ClassNotFoundException:
org.apache.spark.HoodieSparkKryoRegistrar
at
org.apache.spark.repl.ExecutorClassLoader.findClass(ExecutorClassLoader.scala:124)
at java.lang.ClassLoader.loadClass(ClassLoader.java:418)
at java.lang.ClassLoader.loadClass(ClassLoader.java:351)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:348)
at org.apache.spark.util.Utils$.classForName(Utils.scala:220)
at
org.apache.spark.serializer.KryoSerializer.$anonfun$newKryo$7(KryoSerializer.scala:178)
at
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
at
scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at
scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at scala.collection.TraversableLike.map(TraversableLike.scala:286)
at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
at scala.collection.AbstractTraversable.map(Traversable.scala:108)
at
org.apache.spark.serializer.KryoSerializer.$anonfun$newKryo$5(KryoSerializer.scala:178)
... 22 more
Caused by: java.lang.ClassNotFoundException:
org.apache.spark.HoodieSparkKryoRegistrar
at java.lang.ClassLoader.findClass(ClassLoader.java:523)
at
org.apache.spark.util.ParentClassLoader.findClass(ParentClassLoader.java:35)
at java.lang.ClassLoader.loadClass(ClassLoader.java:418)
at
org.apache.spark.util.ParentClassLoader.loadClass(ParentClassLoader.java:40)
at java.lang.ClassLoader.loadClass(ClassLoader.java:351)
at
org.apache.spark.repl.ExecutorClassLoader.findClass(ExecutorClassLoader.scala:109)
... 36 more
at
org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2672)
at
org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2608)
at
org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2607)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at
scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2607)
at
org.apache.spark.scheduler.DAGScheduler.submitMissingTasks(DAGScheduler.scala:1523)
at
org.apache.spark.scheduler.DAGScheduler.submitStage(DAGScheduler.scala:1329)
at
org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:1271)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2810)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2802)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2791)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:952)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2228)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2249)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2268)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2293)
at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1021)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:406)
at org.apache.spark.rdd.RDD.collect(RDD.scala:1020)
at org.apache.spark.api.java.JavaRDDLike.collect(JavaRDDLike.scala:362)
at org.apache.spark.api.java.JavaRDDLike.collect$(JavaRDDLike.scala:361)
at
org.apache.spark.api.java.AbstractJavaRDDLike.collect(JavaRDDLike.scala:45)
at
org.apache.hudi.client.common.HoodieSparkEngineContext.map(HoodieSparkEngineContext.java:103)
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.listAllPartitions(HoodieBackedTableMetadataWriter.java:631)
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.initialCommit(HoodieBackedTableMetadataWriter.java:1064)
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.initializeFromFilesystem(HoodieBackedTableMetadataWriter.java:557)
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.initializeIfNeeded(HoodieBackedTableMetadataWriter.java:390)
at
org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter.initialize(SparkHoodieBackedTableMetadataWriter.java:120)
at
org.apache.hudi.metadata.HoodieBackedTableMetadataWriter.<init>(HoodieBackedTableMetadataWriter.java:171)
at
org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter.<init>(SparkHoodieBackedTableMetadataWriter.java:89)
at
org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter.create(SparkHoodieBackedTableMetadataWriter.java:75)
at
org.apache.hudi.client.SparkRDDWriteClient.initializeMetadataTable(SparkRDDWriteClient.java:446)
at
org.apache.hudi.client.SparkRDDWriteClient.doInitTable(SparkRDDWriteClient.java:431)
at
org.apache.hudi.client.BaseHoodieWriteClient.initTable(BaseHoodieWriteClient.java:1459)
at
org.apache.hudi.client.BaseHoodieWriteClient.initTable(BaseHoodieWriteClient.java:1491)
at
org.apache.hudi.client.SparkRDDWriteClient.upsert(SparkRDDWriteClient.java:152)
at
org.apache.hudi.DataSourceUtils.doWriteOperation(DataSourceUtils.java:206)
at
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:329)
at org.apache.hudi.DefaultSource.createRelation(DefaultSource.scala:183)
at
org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73)
at
org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:84)
at
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:98)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:109)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:169)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:95)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
at
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98)
at
org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:94)
at
org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:584)
at
org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:176)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:584)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
at
org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
at
org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
at
org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:560)
at
org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:94)
at
org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:81)
at
org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:79)
at
org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:116)
at
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:860)
at
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:390)
at
org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:363)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
... 53 elided
Caused by: org.apache.spark.SparkException: Failed to register classes with
Kryo
at
org.apache.spark.serializer.KryoSerializer.$anonfun$newKryo$5(KryoSerializer.scala:183)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at org.apache.spark.util.Utils$.withContextClassLoader(Utils.scala:233)
at
org.apache.spark.serializer.KryoSerializer.newKryo(KryoSerializer.scala:171)
at
org.apache.spark.serializer.KryoSerializer$$anon$1.create(KryoSerializer.scala:102)
at
com.esotericsoftware.kryo.pool.KryoPoolQueueImpl.borrow(KryoPoolQueueImpl.java:48)
at
org.apache.spark.serializer.KryoSerializer$PoolWrapper.borrow(KryoSerializer.scala:109)
at
org.apache.spark.serializer.KryoSerializerInstance.borrowKryo(KryoSerializer.scala:346)
at
org.apache.spark.serializer.KryoSerializationStream.<init>(KryoSerializer.scala:266)
at
org.apache.spark.serializer.KryoSerializerInstance.serializeStream(KryoSerializer.scala:432)
at
org.apache.spark.broadcast.TorrentBroadcast$.blockifyObject(TorrentBroadcast.scala:319)
at
org.apache.spark.broadcast.TorrentBroadcast.writeBlocks(TorrentBroadcast.scala:140)
at
org.apache.spark.broadcast.TorrentBroadcast.<init>(TorrentBroadcast.scala:95)
at
org.apache.spark.broadcast.TorrentBroadcastFactory.newBroadcast(TorrentBroadcastFactory.scala:34)
at
org.apache.spark.broadcast.BroadcastManager.newBroadcast(BroadcastManager.scala:75)
at org.apache.spark.SparkContext.broadcast(SparkContext.scala:1529)
at
org.apache.spark.scheduler.DAGScheduler.submitMissingTasks(DAGScheduler.scala:1513)
at
org.apache.spark.scheduler.DAGScheduler.submitStage(DAGScheduler.scala:1329)
at
org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:1271)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2810)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2802)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2791)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
Caused by: java.lang.ClassNotFoundException:
org.apache.spark.HoodieSparkKryoRegistrar
at
org.apache.spark.repl.ExecutorClassLoader.findClass(ExecutorClassLoader.scala:124)
at java.lang.ClassLoader.loadClass(ClassLoader.java:418)
at java.lang.ClassLoader.loadClass(ClassLoader.java:351)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:348)
at org.apache.spark.util.Utils$.classForName(Utils.scala:220)
at
org.apache.spark.serializer.KryoSerializer.$anonfun$newKryo$7(KryoSerializer.scala:178)
at
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at
scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at scala.collection.TraversableLike.map(TraversableLike.scala:286)
at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
at scala.collection.AbstractTraversable.map(Traversable.scala:108)
at
org.apache.spark.serializer.KryoSerializer.$anonfun$newKryo$5(KryoSerializer.scala:178)
... 22 more
Caused by: java.lang.ClassNotFoundException:
org.apache.spark.HoodieSparkKryoRegistrar
at java.lang.ClassLoader.findClass(ClassLoader.java:523)
at
org.apache.spark.util.ParentClassLoader.findClass(ParentClassLoader.java:35)
at java.lang.ClassLoader.loadClass(ClassLoader.java:418)
at
org.apache.spark.util.ParentClassLoader.loadClass(ParentClassLoader.java:40)
at java.lang.ClassLoader.loadClass(ClassLoader.java:351)
at
org.apache.spark.repl.ExecutorClassLoader.findClass(ExecutorClassLoader.scala:109)
... 36 more
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]