Re: Spark SQL UDT Kryo serialization, Unable to find class
>> >>> org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:32) >>>at >>> >>> org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39) >>>at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) >>>at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) >>>at >>> >>> org.apache.spark.sql.execution.joins.HashedRelation$.apply(HashedRelation.scala:80) >>>at >>> >>> org.apache.spark.sql.execution.joins.ShuffledHashJoin$$anonfun$execute$1.apply(ShuffledHashJoin.scala:46) >>>at >>> >>> org.apache.spark.sql.execution.joins.ShuffledHashJoin$$anonfun$execute$1.apply(ShuffledHashJoin.scala:45) >>>at >>> >>> org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:88) >>>at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) >>>at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) >>>at >>> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) >>>at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) >>>at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) >>>at >>> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) >>>at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) >>>at >>> org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:61) >>>at org.apache.spark.rdd.RDD.iterator(RDD.scala:228) >>>at >>> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) >>>at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) >>>at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) >>>at >>> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) >>>at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) >>>at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) >>>at >>> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) >>>at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) >>>at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) >>>at >>> >>> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68) >>>at >>> >>> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) >>>at org.apache.spark.scheduler.Task.run(Task.scala:56) >>>at >>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196) >>>at >>> >>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) >>>at >>> >>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) >>>at java.lang.Thread.run(Thread.java:745) >>> *Caused by: java.lang.ClassNotFoundException: >>> com.gis.io.GeometryWritable* >>>at java.net.URLClassLoader$1.run(URLClassLoader.java:366) >>>at java.net.URLClassLoader$1.run(URLClassLoader.java:355) >>>at java.security.AccessController.doPrivileged(Native Method) >>>at java.net.URLClassLoader.findClass(URLClassLoader.java:354) >>>at java.lang.ClassLoader.loadClass(ClassLoader.java:425) >>>at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) >>>at java.lang.ClassLoader.loadClass(ClassLoader.java:358) >>>at java.lang.Class.forName0(Native Method) >>>at java.lang.Class.forName(Class.java:270) >>>at >>> >>> com.esotericsoftware.kryo.util.DefaultClassResolver.readName(DefaultClassResolver.java:136) >>>... 52 more >>> >>> Where /com.gis.io.GeometryWritable/ is my Custom class for which I'm >>> creating UDT, which is present in the APP jar. I've tried as disused >>> here >>> <https://groups.google.com/forum/#!topic/aureliusgraphs/iqDbeLJsGsg> >>> also >>> by changing spark default serializer from kryo to Java. But it doesn't >>> help >>> me out. Any Suggestions ?? If I'm missing something? Following are my >>> classes: >>> >>> @SQLUserDefinedType(udt = classOf[GeometryUDT]) >>> class GeometryWritable(var _geometry: Geometry) extends Writable with >>> Serializable { >>>
Re: Spark SQL UDT Kryo serialization, Unable to find class
.scala:263) >at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) >at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) >at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) >at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) >at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) >at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263) >at org.apache.spark.rdd.RDD.iterator(RDD.scala:230) >at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68) >at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) >at org.apache.spark.scheduler.Task.run(Task.scala:56) >at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196) >at > > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) >at > > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) >at java.lang.Thread.run(Thread.java:745) > *Caused by: java.lang.ClassNotFoundException: > com.gis.io.GeometryWritable* >at java.net.URLClassLoader$1.run(URLClassLoader.java:366) >at java.net.URLClassLoader$1.run(URLClassLoader.java:355) >at java.security.AccessController.doPrivileged(Native Method) >at java.net.URLClassLoader.findClass(URLClassLoader.java:354) >at java.lang.ClassLoader.loadClass(ClassLoader.java:425) >at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) >at java.lang.ClassLoader.loadClass(ClassLoader.java:358) >at java.lang.Class.forName0(Native Method) >at java.lang.Class.forName(Class.java:270) >at > > com.esotericsoftware.kryo.util.DefaultClassResolver.readName(DefaultClassResolver.java:136) >... 52 more > > Where /com.gis.io.GeometryWritable/ is my Custom class for which I'm > creating UDT, which is present in the APP jar. I've tried as disused here > <https://groups.google.com/forum/#!topic/aureliusgraphs/iqDbeLJsGsg> > also > by changing spark default serializer from kryo to Java. But it doesn't help > me out. Any Suggestions ?? If I'm missing something? Following are my > classes: > > @SQLUserDefinedType(udt = classOf[GeometryUDT]) > class GeometryWritable(var _geometry: Geometry) extends Writable with > Serializable { > >def geometry = _geometry >def geometry_=(geometry: Geometry) = _geometry = geometry > >def this() = this(null) > >override def write(dataOutput: DataOutput) : Unit = {} >override def readFields(dataInput: DataInput) : Unit = {} >@throws(classOf[IOException]) >private def writeObject(stream: ObjectOutputStream): Unit = {} >@throws(classOf[IOException]) >private def readObject(stream: ObjectInputStream): Unit = {} > } > > class GeometryUDT extends UserDefinedType[GeometryWritable] with > Serializable { > > override def sqlType: DataType = ArrayType(ByteType) > override def serialize(obj: Any): Array[Byte] = {} > override def deserialize(datum: Any): GeometryWritable = {} > override def userClass: Class[GeometryWritable] = > classOf[GeometryWritable] > >} > > This is how I'm using it. > > val rdd = sc.textFile(args(0)).map( >line => { > val point = new Point > point.setY(line.split(" ")(0).toDouble) > point.setX(line.split(" ")(1).toDouble) >Row.fromSeq(Seq(new GeometryWritable(point))) > }) > val schema = StructType(Seq(StructField("Geometry",new GeometryUDT, > true))) > > val schemaRDD = sqlContext.applySchema(rdd, > schema).persist(StorageLevel.MEMORY_AND_DISK) > > > > -- > View this message in context: > http://apache-spark-user-list.1001560.n3.nabble.com/Spark-SQL-UDT-Kryo-serialization-Unable-to-find-class-tp22101.html > Sent from the Apache Spark User List mailing list archive at Nabble.com. > > - > To unsubscribe, e-mail: user-unsubscr...@spark.apache.org > For additional commands, e-mail: user-h...@spark.apache.org > >
Spark SQL UDT Kryo serialization, Unable to find class
:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) *Caused by: java.lang.ClassNotFoundException: com.gis.io.GeometryWritable* at java.net.URLClassLoader$1.run(URLClassLoader.java:366) at java.net.URLClassLoader$1.run(URLClassLoader.java:355) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(URLClassLoader.java:354) at java.lang.ClassLoader.loadClass(ClassLoader.java:425) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) at java.lang.ClassLoader.loadClass(ClassLoader.java:358) at java.lang.Class.forName0(Native Method) at java.lang.Class.forName(Class.java:270) at com.esotericsoftware.kryo.util.DefaultClassResolver.readName(DefaultClassResolver.java:136) ... 52 more Where /com.gis.io.GeometryWritable/ is my Custom class for which I'm creating UDT, which is present in the APP jar. I've tried as disused here <https://groups.google.com/forum/#!topic/aureliusgraphs/iqDbeLJsGsg> also by changing spark default serializer from kryo to Java. But it doesn't help me out. Any Suggestions ?? If I'm missing something? Following are my classes: @SQLUserDefinedType(udt = classOf[GeometryUDT]) class GeometryWritable(var _geometry: Geometry) extends Writable with Serializable { def geometry = _geometry def geometry_=(geometry: Geometry) = _geometry = geometry def this() = this(null) override def write(dataOutput: DataOutput) : Unit = {} override def readFields(dataInput: DataInput) : Unit = {} @throws(classOf[IOException]) private def writeObject(stream: ObjectOutputStream): Unit = {} @throws(classOf[IOException]) private def readObject(stream: ObjectInputStream): Unit = {} } class GeometryUDT extends UserDefinedType[GeometryWritable] with Serializable { override def sqlType: DataType = ArrayType(ByteType) override def serialize(obj: Any): Array[Byte] = {} override def deserialize(datum: Any): GeometryWritable = {} override def userClass: Class[GeometryWritable] = classOf[GeometryWritable] } This is how I'm using it. val rdd = sc.textFile(args(0)).map( line => { val point = new Point point.setY(line.split(" ")(0).toDouble) point.setX(line.split(" ")(1).toDouble) Row.fromSeq(Seq(new GeometryWritable(point))) }) val schema = StructType(Seq(StructField("Geometry",new GeometryUDT, true))) val schemaRDD = sqlContext.applySchema(rdd, schema).persist(StorageLevel.MEMORY_AND_DISK) -- View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/Spark-SQL-UDT-Kryo-serialization-Unable-to-find-class-tp22101.html Sent from the Apache Spark User List mailing list archive at Nabble.com. - To unsubscribe, e-mail: user-unsubscr...@spark.apache.org For additional commands, e-mail: user-h...@spark.apache.org