Hi Deenar, it is known issue that dependencies added with --jar option load the libraries only to spark interpreter, but not to pyspark, spark sql interpreter. As an alternative Zeppelin provides another way of dependency loading [1], which is supported only in dev branch at the moment. Could you please try this out with latest master branch?
[1] https://zeppelin.incubator.apache.org/docs/0.6.0-incubating-SNAPSHOT/manual/dependencymanagement.html On Fri, Feb 26, 2016 at 1:28 AM, Deenar Toraskar <deenar.toras...@gmail.com> wrote: > Hi > > I have some avro tables and the spark-avro jars referenced in the --jars. > I can query the table successfully using sqlContext.sql("select * from > avroTable"), but get the following error when doing it using %sql select * > from avroTable. Any assistance would be appreciated. > > Deenar > > sqlContext.sql("select * from avroTable") > java.io.IOException: Class not found > at org.apache.xbean.asm5.ClassReader.a(Unknown Source) > at org.apache.xbean.asm5.ClassReader.<init>(Unknown Source) > at > org.apache.spark.util.ClosureCleaner$.getClassReader(ClosureCleaner.scala:40) > at > org.apache.spark.util.ClosureCleaner$.getInnerClosureClasses(ClosureCleaner.scala:81) > at > org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:187) > at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:122) > at org.apache.spark.SparkContext.clean(SparkContext.scala:2055) > at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:324) > at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:323) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) > at org.apache.spark.rdd.RDD.map(RDD.scala:323) > at > com.databricks.spark.avro.AvroRelation$$anonfun$buildScan$1.apply(AvroRelation.scala:125) > at > com.databricks.spark.avro.AvroRelation$$anonfun$buildScan$1.apply(AvroRelation.scala:120) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) > at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:108) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:244) > at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:108) > at com.databricks.spark.avro.AvroRelation.buildScan(AvroRelation.scala:120) > at > org.apache.spark.sql.sources.HadoopFsRelation.buildScan(interfaces.scala:762) > at > org.apache.spark.sql.sources.HadoopFsRelation.buildScan(interfaces.scala:790) > at > org.apache.spark.sql.sources.HadoopFsRelation.buildInternalScan(interfaces.scala:821) > at > org.apache.spark.sql.sources.HadoopFsRelation.buildInternalScan(interfaces.scala:661) > at > org.apache.spark.sql.execution.datasources.DataSourceStrategy$$anonfun$11$$anonfun$13.apply(DataSourceStrategy.scala:158) > at > org.apache.spark.sql.execution.datasources.DataSourceStrategy$$anonfun$11$$anonfun$13.apply(DataSourceStrategy.scala:154) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) > at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:108) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:244) > at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:108) > at > org.apache.spark.sql.execution.datasources.DataSourceStrategy$$anonfun$11.apply(DataSourceStrategy.scala:154) > at > org.apache.spark.sql.execution.datasources.DataSourceStrategy$$anonfun$11.apply(DataSourceStrategy.scala:149) > at > org.apache.spark.sql.execution.datasources.DataSourceStrategy$$anonfun$pruneFilterProject$1.apply(DataSourceStrategy.scala:274) > at > org.apache.spark.sql.execution.datasources.DataSourceStrategy$$anonfun$pruneFilterProject$1.apply(DataSourceStrategy.scala:273) > at > org.apache.spark.sql.execution.datasources.DataSourceStrategy$.pruneFilterProjectRaw(DataSourceStrategy.scala:352) > at > org.apache.spark.sql.execution.datasources.DataSourceStrategy$.pruneFilterProject(DataSourceStrategy.scala:269) > at > org.apache.spark.sql.execution.datasources.DataSourceStrategy$.buildPartitionedTableScan(DataSourceStrategy.scala:184) > at > org.apache.spark.sql.execution.datasources.DataSourceStrategy$.apply(DataSourceStrategy.scala:91) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58) > at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner.planLater(QueryPlanner.scala:54) > at > org.apache.spark.sql.execution.SparkStrategies$BasicOperators$.apply(SparkStrategies.scala:349) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58) > at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59) > at > org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:47) > at > org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:45) > at > org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:52) > at > org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:52) > at org.apache.spark.sql.DataFrame.withCallback(DataFrame.scala:2134) > at org.apache.spark.sql.DataFrame.head(DataFrame.scala:1413) > at org.apache.spark.sql.DataFrame.take(DataFrame.scala:1495) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:497) > at > org.apache.zeppelin.spark.ZeppelinContext.showDF(ZeppelinContext.java:301) > at > org.apache.zeppelin.spark.SparkSqlInterpreter.interpret(SparkSqlInterpreter.java:144) > at > org.apache.zeppelin.interpreter.ClassloaderInterpreter.interpret(ClassloaderInterpreter.java:57) > at > org.apache.zeppelin.interpreter.LazyOpenInterpreter.interpret(LazyOpenInterpreter.java:93) > at > org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer$InterpretJob.jobRun(RemoteInterpreterServer.java:299) > at org.apache.zeppelin.scheduler.Job.run(Job.java:171) > at > org.apache.zeppelin.scheduler.FIFOScheduler$1.run(FIFOScheduler.java:139) > at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) > at java.util.concurrent.FutureTask.run(FutureTask.java:266) > at > java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180) > at > java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) >