Re: Spark SQL UDT Kryo serialization, Unable to find class

2015-03-20 Thread Michael Armbrust
>>
>>> org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:32)
>>>at
>>>
>>> org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
>>>at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
>>>at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
>>>at
>>>
>>> org.apache.spark.sql.execution.joins.HashedRelation$.apply(HashedRelation.scala:80)
>>>at
>>>
>>> org.apache.spark.sql.execution.joins.ShuffledHashJoin$$anonfun$execute$1.apply(ShuffledHashJoin.scala:46)
>>>at
>>>
>>> org.apache.spark.sql.execution.joins.ShuffledHashJoin$$anonfun$execute$1.apply(ShuffledHashJoin.scala:45)
>>>at
>>>
>>> org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:88)
>>>at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263)
>>>at org.apache.spark.rdd.RDD.iterator(RDD.scala:230)
>>>at
>>> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
>>>at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263)
>>>at org.apache.spark.rdd.RDD.iterator(RDD.scala:230)
>>>at
>>> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
>>>at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263)
>>>at
>>> org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:61)
>>>at org.apache.spark.rdd.RDD.iterator(RDD.scala:228)
>>>at
>>> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
>>>at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263)
>>>at org.apache.spark.rdd.RDD.iterator(RDD.scala:230)
>>>at
>>> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
>>>at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263)
>>>at org.apache.spark.rdd.RDD.iterator(RDD.scala:230)
>>>at
>>> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
>>>at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263)
>>>at org.apache.spark.rdd.RDD.iterator(RDD.scala:230)
>>>at
>>>
>>> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68)
>>>at
>>>
>>> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
>>>at org.apache.spark.scheduler.Task.run(Task.scala:56)
>>>at
>>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196)
>>>at
>>>
>>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
>>>at
>>>
>>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
>>>at java.lang.Thread.run(Thread.java:745)
>>> *Caused by: java.lang.ClassNotFoundException:
>>> com.gis.io.GeometryWritable*
>>>at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
>>>at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
>>>at java.security.AccessController.doPrivileged(Native Method)
>>>at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
>>>at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
>>>at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>>>at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
>>>at java.lang.Class.forName0(Native Method)
>>>at java.lang.Class.forName(Class.java:270)
>>>at
>>>
>>> com.esotericsoftware.kryo.util.DefaultClassResolver.readName(DefaultClassResolver.java:136)
>>>... 52 more
>>>
>>> Where /com.gis.io.GeometryWritable/ is my Custom class for which I'm
>>> creating UDT, which is present in the APP jar. I've tried as disused
>>> here
>>> <https://groups.google.com/forum/#!topic/aureliusgraphs/iqDbeLJsGsg>
>>>  also
>>> by changing spark default serializer from kryo to Java. But it doesn't
>>> help
>>> me out. Any Suggestions ?? If I'm missing something? Following are my
>>> classes:
>>>
>>> @SQLUserDefinedType(udt = classOf[GeometryUDT])
>>> class GeometryWritable(var _geometry: Geometry) extends Writable with
>>> Serializable {
>>>

Re: Spark SQL UDT Kryo serialization, Unable to find class

2015-03-17 Thread Michael Armbrust
.scala:263)
>at org.apache.spark.rdd.RDD.iterator(RDD.scala:230)
>at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
>at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263)
>at org.apache.spark.rdd.RDD.iterator(RDD.scala:230)
>at
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35)
>at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:263)
>at org.apache.spark.rdd.RDD.iterator(RDD.scala:230)
>at
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:68)
>at
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
>at org.apache.spark.scheduler.Task.run(Task.scala:56)
>at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196)
>at
>
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
>at
>
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
>at java.lang.Thread.run(Thread.java:745)
> *Caused by: java.lang.ClassNotFoundException:
> com.gis.io.GeometryWritable*
>at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
>at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
>at java.security.AccessController.doPrivileged(Native Method)
>at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
>at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
>at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
>at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
>at java.lang.Class.forName0(Native Method)
>at java.lang.Class.forName(Class.java:270)
>at
>
> com.esotericsoftware.kryo.util.DefaultClassResolver.readName(DefaultClassResolver.java:136)
>... 52 more
>
> Where /com.gis.io.GeometryWritable/ is my Custom class for which I'm
> creating UDT, which is present in the APP jar. I've tried as disused  here
> <https://groups.google.com/forum/#!topic/aureliusgraphs/iqDbeLJsGsg>
>  also
> by changing spark default serializer from kryo to Java. But it doesn't help
> me out. Any Suggestions ?? If I'm missing something? Following are my
> classes:
>
> @SQLUserDefinedType(udt = classOf[GeometryUDT])
> class GeometryWritable(var _geometry: Geometry) extends Writable with
> Serializable {
>
>def geometry = _geometry
>def geometry_=(geometry: Geometry) = _geometry = geometry
>
>def this() = this(null)
>
>override def write(dataOutput: DataOutput) : Unit = {}
>override def readFields(dataInput: DataInput) : Unit = {}
>@throws(classOf[IOException])
>private def writeObject(stream: ObjectOutputStream): Unit = {}
>@throws(classOf[IOException])
>private def readObject(stream: ObjectInputStream): Unit = {}
> }
>
> class GeometryUDT extends UserDefinedType[GeometryWritable] with
> Serializable {
>
>   override def sqlType: DataType = ArrayType(ByteType)
>   override def serialize(obj: Any): Array[Byte] = {}
>   override def deserialize(datum: Any): GeometryWritable = {}
>   override def userClass: Class[GeometryWritable] =
> classOf[GeometryWritable]
>
>}
>
> This is how I'm using it.
>
>  val rdd = sc.textFile(args(0)).map(
>line => {
> val point = new Point
> point.setY(line.split(" ")(0).toDouble)
> point.setX(line.split(" ")(1).toDouble)
>Row.fromSeq(Seq(new GeometryWritable(point)))
>  })
>  val schema = StructType(Seq(StructField("Geometry",new GeometryUDT,
> true)))
>
>  val schemaRDD = sqlContext.applySchema(rdd,
> schema).persist(StorageLevel.MEMORY_AND_DISK)
>
>
>
> --
> View this message in context:
> http://apache-spark-user-list.1001560.n3.nabble.com/Spark-SQL-UDT-Kryo-serialization-Unable-to-find-class-tp22101.html
> Sent from the Apache Spark User List mailing list archive at Nabble.com.
>
> -
> To unsubscribe, e-mail: user-unsubscr...@spark.apache.org
> For additional commands, e-mail: user-h...@spark.apache.org
>
>


Spark SQL UDT Kryo serialization, Unable to find class

2015-03-17 Thread zia_kayani
:1145)
   at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
   at java.lang.Thread.run(Thread.java:745)
*Caused by: java.lang.ClassNotFoundException:
com.gis.io.GeometryWritable*
   at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
   at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
   at java.security.AccessController.doPrivileged(Native Method)
   at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
   at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
   at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
   at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
   at java.lang.Class.forName0(Native Method)
   at java.lang.Class.forName(Class.java:270)
   at
com.esotericsoftware.kryo.util.DefaultClassResolver.readName(DefaultClassResolver.java:136)
   ... 52 more

Where /com.gis.io.GeometryWritable/ is my Custom class for which I'm
creating UDT, which is present in the APP jar. I've tried as disused  here
<https://groups.google.com/forum/#!topic/aureliusgraphs/iqDbeLJsGsg>   also
by changing spark default serializer from kryo to Java. But it doesn't help
me out. Any Suggestions ?? If I'm missing something? Following are my
classes:

@SQLUserDefinedType(udt = classOf[GeometryUDT])
class GeometryWritable(var _geometry: Geometry) extends Writable with
Serializable {

   def geometry = _geometry
   def geometry_=(geometry: Geometry) = _geometry = geometry

   def this() = this(null)

   override def write(dataOutput: DataOutput) : Unit = {}
   override def readFields(dataInput: DataInput) : Unit = {}
   @throws(classOf[IOException])
   private def writeObject(stream: ObjectOutputStream): Unit = {}
   @throws(classOf[IOException])
   private def readObject(stream: ObjectInputStream): Unit = {}
}

class GeometryUDT extends UserDefinedType[GeometryWritable] with
Serializable {

  override def sqlType: DataType = ArrayType(ByteType)
  override def serialize(obj: Any): Array[Byte] = {}
  override def deserialize(datum: Any): GeometryWritable = {}
  override def userClass: Class[GeometryWritable] =
classOf[GeometryWritable]

   }

This is how I'm using it.

 val rdd = sc.textFile(args(0)).map(
   line => {
val point = new Point
point.setY(line.split(" ")(0).toDouble)
point.setX(line.split(" ")(1).toDouble)
   Row.fromSeq(Seq(new GeometryWritable(point)))
 })
 val schema = StructType(Seq(StructField("Geometry",new GeometryUDT,
true)))

 val schemaRDD = sqlContext.applySchema(rdd,
schema).persist(StorageLevel.MEMORY_AND_DISK)



--
View this message in context: 
http://apache-spark-user-list.1001560.n3.nabble.com/Spark-SQL-UDT-Kryo-serialization-Unable-to-find-class-tp22101.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

-
To unsubscribe, e-mail: user-unsubscr...@spark.apache.org
For additional commands, e-mail: user-h...@spark.apache.org