[ 
https://issues.apache.org/jira/browse/SPARK-17922?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

kanika dhuria updated SPARK-17922:
----------------------------------
    Description: 
I am using spark 2.0
Seeing class loading issue because the whole stage code gen is generating 
multiple classes with same name as 
"org.apache.spark.sql.catalyst.expressions.GeneratedClass"
I am using dataframe transform. and within transform i use Osgi.
Osgi replaces the thread context class loader to ContextFinder which looks at 
all the class loaders in the stack to find out the new generated class and 
finds the GeneratedClass with inner class GeneratedIterator byteclass 
loader(instead of falling back to the byte class loader created by janino 
compiler), since the class name is same that byte class loader loads the class 
and returns GeneratedClass$GeneratedIterator instead of expected 
GeneratedClass$UnsafeProjection.

Can we generate different classes with different names or is it expected to 
generate one class only.
This is the rough repro

import org.apache.spark.sql._
import org.apache.spark.sql.types._
import com.databricks.spark.avro._

  def exePart(out:StructType): ((Iterator[Row]) => Iterator[Row]) = {
//Initialize osgi
     (rows:Iterator[Row]) => {
         var outi = Iterator[Row]() 
         while(rows.hasNext) {    
             val r = rows.next         
             outi = outi.++(Iterator(Row(r.get(0))))          
         } 
         //val ors = Row("abc")               
         //outi =outi.++( Iterator(ors))  
         outi
     }
  }

def transform1( outType:StructType) :((DataFrame) => DataFrame) = {
     (d:DataFrame) => {
      val inType = d.schema
      val rdd = d.rdd.mapPartitions(exePart(outType))
      d.sqlContext.createDataFrame(rdd, outType)
    }
   
  }

val df = spark.read.avro("file:///data/builds/a1.avro")
val df1 = df.select($"id2").filter(false)
val df2 = df1.transform(transform1(StructType(StructField("p1", IntegerType, 
true)::Nil))).createOrReplaceTempView("tbl0")

spark.sql("insert overwrite table testtable select p1 from tbl0")


  was:
I am using spark 2.0
Seeing class loading issue because the whole stage code gen is generating 
multiple classes with same name as 
"org.apache.spark.sql.catalyst.expressions.GeneratedClass"
I am using dataframe transform. and within transform i use Osgi.
Osgi replaces the thread context class loader to ContextFinder which looks at 
all the class loaders in the stack to find out the new generated class and 
finds the GeneratedClass with inner class GeneratedIterator byteclass 
loader(instead of falling back to the byte class loader created by janino 
compiler), since the class name is same that byte class loader loads the class 
and returns GeneratedClass$GeneratedIterator instead of expected 
GeneratedClass$UnsafeProjection.



> ClassCastException java.lang.ClassCastException: 
> org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator 
> cannot be cast to org.apache.spark.sql.catalyst.expressions.UnsafeProjection 
> ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>
>                 Key: SPARK-17922
>                 URL: https://issues.apache.org/jira/browse/SPARK-17922
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.0.0
>            Reporter: kanika dhuria
>
> I am using spark 2.0
> Seeing class loading issue because the whole stage code gen is generating 
> multiple classes with same name as 
> "org.apache.spark.sql.catalyst.expressions.GeneratedClass"
> I am using dataframe transform. and within transform i use Osgi.
> Osgi replaces the thread context class loader to ContextFinder which looks at 
> all the class loaders in the stack to find out the new generated class and 
> finds the GeneratedClass with inner class GeneratedIterator byteclass 
> loader(instead of falling back to the byte class loader created by janino 
> compiler), since the class name is same that byte class loader loads the 
> class and returns GeneratedClass$GeneratedIterator instead of expected 
> GeneratedClass$UnsafeProjection.
> Can we generate different classes with different names or is it expected to 
> generate one class only.
> This is the rough repro
> import org.apache.spark.sql._
> import org.apache.spark.sql.types._
> import com.databricks.spark.avro._
>   def exePart(out:StructType): ((Iterator[Row]) => Iterator[Row]) = {
> //Initialize osgi
>      (rows:Iterator[Row]) => {
>          var outi = Iterator[Row]() 
>          while(rows.hasNext) {    
>              val r = rows.next         
>              outi = outi.++(Iterator(Row(r.get(0))))          
>          } 
>          //val ors = Row("abc")               
>          //outi =outi.++( Iterator(ors))  
>          outi
>      }
>   }
> def transform1( outType:StructType) :((DataFrame) => DataFrame) = {
>      (d:DataFrame) => {
>       val inType = d.schema
>       val rdd = d.rdd.mapPartitions(exePart(outType))
>       d.sqlContext.createDataFrame(rdd, outType)
>     }
>    
>   }
> val df = spark.read.avro("file:///data/builds/a1.avro")
> val df1 = df.select($"id2").filter(false)
> val df2 = df1.transform(transform1(StructType(StructField("p1", IntegerType, 
> true)::Nil))).createOrReplaceTempView("tbl0")
> spark.sql("insert overwrite table testtable select p1 from tbl0")



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to