Hi All,

 I am trying to use dataframes (which contain data from cassandra) in
rdd.foreach. This is throwing the following exception:

Is CassandraSQLContext accessible within executor ????

15/09/28 17:22:40 ERROR JobScheduler: Error running job streaming job
1443441160000 ms.0
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0
in stage 6.0 failed 4 times, most recent failure: Lost task 0.3 in stage
6.0 (TID 83, blrrndnbudn05.rnd.amadeus.net): java.lang.NullPointerException
        at org.apache.spark.sql.SQLContext.parseSql(SQLContext.scala:134)
        at
org.apache.spark.sql.cassandra.CassandraSQLContext.cassandraSql(CassandraSQLContext.scala:74)
        at
org.apache.spark.sql.cassandra.CassandraSQLContext.sql(CassandraSQLContext.scala:77)
        at
com.amadeus.spark.example.SparkDemo$.withDataFrames(SparkDemo.scala:170)
        at
com.amadeus.spark.example.SparkDemo$$anonfun$main$1$$anonfun$apply$1.apply(SparkDemo.scala:158)
        at
com.amadeus.spark.example.SparkDemo$$anonfun$main$1$$anonfun$apply$1.apply(SparkDemo.scala:158)
        at scala.collection.Iterator$class.foreach(Iterator.scala:727)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
        at org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:798)
        at org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:798)
        at
org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1503)
        at
org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1503)
        at
org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)
        at org.apache.spark.scheduler.Task.run(Task.scala:64)
        at
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203)
        at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
        at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
        at java.lang.Thread.run(Thread.java:745)

Driver stacktrace:
        at org.apache.spark.scheduler.DAGScheduler.org
$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203)
        at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192)
        at
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191)
        at
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
        at
scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
        at
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191)
        at
org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:693)
        at
org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:693)
        at scala.Option.foreach(Option.scala:236)
        at
org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:693)
        at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1393)
        at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354)
        at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)

Reply via email to