Hi,
I am getting this error when trying to ingest the second batch of data (
upsets) into COW dataset. Looks like the KryoInstantiator is missing in the
jars. Is this something that needs to be added to classpath separately ?
2019-04-02 21:36:23 ERROR HoodieCopyOnWriteTable:274 - Error upserting
bucketType UPDATE for partition :0
java.lang.NoClassDefFoundError:
com/uber/hoodie/com/twitter/chill/KryoInstantiator
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:763)
at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
...
at
com.uber.hoodie.common.util.SerializationUtils.serialize(SerializationUtils.java:50)
at
com.uber.hoodie.common.util.collection.DiskBasedMap.put(DiskBasedMap.java:169)
at
com.uber.hoodie.common.util.collection.ExternalSpillableMap.put(ExternalSpillableMap.java:169)
at
com.uber.hoodie.common.util.collection.ExternalSpillableMap.put(ExternalSpillableMap.java:42)
at com.uber.hoodie.io.HoodieMergeHandle.init(HoodieMergeHandle.java:159)
at com.uber.hoodie.io.HoodieMergeHandle.<init>(HoodieMergeHandle.java:73)
at
com.uber.hoodie.table.HoodieCopyOnWriteTable.getUpdateHandle(HoodieCopyOnWriteTable.java:230)
at
com.uber.hoodie.table.HoodieCopyOnWriteTable.handleUpdate(HoodieCopyOnWriteTable.java:184)
at
com.uber.hoodie.table.HoodieCopyOnWriteTable.handleUpsertPartition(HoodieCopyOnWriteTable.java:267)
at
com.uber.hoodie.HoodieWriteClient.lambda$upsertRecordsInternal$7ef77fd$1(HoodieWriteClient.java:440)
at
org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102)
at
org.apache.spark.api.java.JavaRDDLike$$anonfun$mapPartitionsWithIndex$1.apply(JavaRDDLike.scala:102)
at
org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$26.apply(RDD.scala:847)
at
org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$26.apply(RDD.scala:847)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:337)
at org.apache.spark.rdd.RDD$$anonfun$7.apply(RDD.scala:335)
at
org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1109)
at
org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1083)
at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1018)
at
org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1083)
at
org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:809)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:109)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.ClassNotFoundException:
com.uber.hoodie.com.twitter.chill.KryoInstantiator
at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
Thanks,
Sudha