Re: CassandraSQLContext throwing NullPointer Exception
Experts on spark-connector-user mailing list should be able to chime in. My reply to that list bounced since I didn't subscribe to that list. FYI On Mon, Sep 28, 2015 at 10:07 AM, Priya Ch wrote: > Ted, > > I am using spark 1.3.0 and running the code in YARN mode. > > Here is the code.. > > object streaming { > def main(args:Array[String]) > { > val conf = new SparkConfst > conf..setMaster("yarn-client") > conf.setAppName("SimpleApp") > conf.set("spark.cassandra.coonection.host","") > > val sc = new SparkContext(conf) > val sqlContext = new CassandraSQLContext(sc) > > val dstream = KafkaUtils.createStream() > val words = dstream.flatMap(line => line.split(",")) > val wordPairs = words.map(word =>(word,1)) > val reducedStream = wordPairs.reduceByKey((a,b) => a+b) > > reducedStream.foreachRDD{ > rdd => rdd.foreach{ > case(key,value) => val df = getDataFrame(sqlContext, key) > df.save("org.apach.spark.cassandra",SaveMode.Overwrite, Map("c_table" -> > "table1","kespace" -> "test")) > } > } > } > > def getDataFrame(cqlContext:CassandraSQLContext, key:String):DataFrame = > cqlContext.sql("select word,count from wordcount where word = '"+key+"'") > } > > In the above code, the method getDataFrame is throwing Null Pointer > Exception at cqlContext.sql line. > > On Mon, Sep 28, 2015 at 6:54 PM, Ted Yu wrote: > >> Which Spark release are you using ? >> >> Can you show the snippet of your code around CassandraSQLContext#sql() ? >> >> Thanks >> >> On Mon, Sep 28, 2015 at 6:21 AM, Priya Ch >> wrote: >> >>> Hi All, >>> >>> I am trying to use dataframes (which contain data from cassandra) in >>> rdd.foreach. This is throwing the following exception: >>> >>> Is CassandraSQLContext accessible within executor >>> >>> 15/09/28 17:22:40 ERROR JobScheduler: Error running job streaming job >>> 144344116 ms.0 >>> org.apache.spark.SparkException: Job aborted due to stage failure: Task >>> 0 in stage 6.0 failed 4 times, most recent failure: Lost task 0.3 in stage >>> 6.0 (TID 83, blrrndnbudn05.rnd.amadeus.net): >>> java.lang.NullPointerException >>> at org.apache.spark.sql.SQLContext.parseSql(SQLContext.scala:134) >>> at >>> org.apache.spark.sql.cassandra.CassandraSQLContext.cassandraSql(CassandraSQLContext.scala:74) >>> at >>> org.apache.spark.sql.cassandra.CassandraSQLContext.sql(CassandraSQLContext.scala:77) >>> at >>> com.amadeus.spark.example.SparkDemo$.withDataFrames(SparkDemo.scala:170) >>> at >>> com.amadeus.spark.example.SparkDemo$$anonfun$main$1$$anonfun$apply$1.apply(SparkDemo.scala:158) >>> at >>> com.amadeus.spark.example.SparkDemo$$anonfun$main$1$$anonfun$apply$1.apply(SparkDemo.scala:158) >>> at scala.collection.Iterator$class.foreach(Iterator.scala:727) >>> at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) >>> at >>> org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:798) >>> at >>> org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:798) >>> at >>> org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1503) >>> at >>> org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1503) >>> at >>> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) >>> at org.apache.spark.scheduler.Task.run(Task.scala:64) >>> at >>> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203) >>> at >>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) >>> at >>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) >>> at java.lang.Thread.run(Thread.java:745) >>> >>> Driver stacktrace: >>> at org.apache.spark.scheduler.DAGScheduler.org >>> $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203) >>> at >>> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192) >>> at >>> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191) >>> at >>> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) >>> at >>> scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) >>> at >>> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191) >>> at >>> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:693) >>> at >>> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:693) >>> at scala.Option.foreach(Option.scala:236) >>> at >>> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:693) >>> at >>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1393) >>> at >>> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGSche
Re: CassandraSQLContext throwing NullPointer Exception
Ted, I am using spark 1.3.0 and running the code in YARN mode. Here is the code.. object streaming { def main(args:Array[String]) { val conf = new SparkConfst conf..setMaster("yarn-client") conf.setAppName("SimpleApp") conf.set("spark.cassandra.coonection.host","") val sc = new SparkContext(conf) val sqlContext = new CassandraSQLContext(sc) val dstream = KafkaUtils.createStream() val words = dstream.flatMap(line => line.split(",")) val wordPairs = words.map(word =>(word,1)) val reducedStream = wordPairs.reduceByKey((a,b) => a+b) reducedStream.foreachRDD{ rdd => rdd.foreach{ case(key,value) => val df = getDataFrame(sqlContext, key) df.save("org.apach.spark.cassandra",SaveMode.Overwrite, Map("c_table" -> "table1","kespace" -> "test")) } } } def getDataFrame(cqlContext:CassandraSQLContext, key:String):DataFrame = cqlContext.sql("select word,count from wordcount where word = '"+key+"'") } In the above code, the method getDataFrame is throwing Null Pointer Exception at cqlContext.sql line. On Mon, Sep 28, 2015 at 6:54 PM, Ted Yu wrote: > Which Spark release are you using ? > > Can you show the snippet of your code around CassandraSQLContext#sql() ? > > Thanks > > On Mon, Sep 28, 2015 at 6:21 AM, Priya Ch > wrote: > >> Hi All, >> >> I am trying to use dataframes (which contain data from cassandra) in >> rdd.foreach. This is throwing the following exception: >> >> Is CassandraSQLContext accessible within executor >> >> 15/09/28 17:22:40 ERROR JobScheduler: Error running job streaming job >> 144344116 ms.0 >> org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 >> in stage 6.0 failed 4 times, most recent failure: Lost task 0.3 in stage >> 6.0 (TID 83, blrrndnbudn05.rnd.amadeus.net): >> java.lang.NullPointerException >> at org.apache.spark.sql.SQLContext.parseSql(SQLContext.scala:134) >> at >> org.apache.spark.sql.cassandra.CassandraSQLContext.cassandraSql(CassandraSQLContext.scala:74) >> at >> org.apache.spark.sql.cassandra.CassandraSQLContext.sql(CassandraSQLContext.scala:77) >> at >> com.amadeus.spark.example.SparkDemo$.withDataFrames(SparkDemo.scala:170) >> at >> com.amadeus.spark.example.SparkDemo$$anonfun$main$1$$anonfun$apply$1.apply(SparkDemo.scala:158) >> at >> com.amadeus.spark.example.SparkDemo$$anonfun$main$1$$anonfun$apply$1.apply(SparkDemo.scala:158) >> at scala.collection.Iterator$class.foreach(Iterator.scala:727) >> at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) >> at >> org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:798) >> at >> org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:798) >> at >> org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1503) >> at >> org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1503) >> at >> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) >> at org.apache.spark.scheduler.Task.run(Task.scala:64) >> at >> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203) >> at >> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) >> at >> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) >> at java.lang.Thread.run(Thread.java:745) >> >> Driver stacktrace: >> at org.apache.spark.scheduler.DAGScheduler.org >> $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203) >> at >> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192) >> at >> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191) >> at >> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) >> at >> scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) >> at >> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191) >> at >> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:693) >> at >> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:693) >> at scala.Option.foreach(Option.scala:236) >> at >> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:693) >> at >> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1393) >> at >> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354) >> at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) >> >> >
Re: CassandraSQLContext throwing NullPointer Exception
Which Spark release are you using ? Can you show the snippet of your code around CassandraSQLContext#sql() ? Thanks On Mon, Sep 28, 2015 at 6:21 AM, Priya Ch wrote: > Hi All, > > I am trying to use dataframes (which contain data from cassandra) in > rdd.foreach. This is throwing the following exception: > > Is CassandraSQLContext accessible within executor > > 15/09/28 17:22:40 ERROR JobScheduler: Error running job streaming job > 144344116 ms.0 > org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 > in stage 6.0 failed 4 times, most recent failure: Lost task 0.3 in stage > 6.0 (TID 83, blrrndnbudn05.rnd.amadeus.net): > java.lang.NullPointerException > at org.apache.spark.sql.SQLContext.parseSql(SQLContext.scala:134) > at > org.apache.spark.sql.cassandra.CassandraSQLContext.cassandraSql(CassandraSQLContext.scala:74) > at > org.apache.spark.sql.cassandra.CassandraSQLContext.sql(CassandraSQLContext.scala:77) > at > com.amadeus.spark.example.SparkDemo$.withDataFrames(SparkDemo.scala:170) > at > com.amadeus.spark.example.SparkDemo$$anonfun$main$1$$anonfun$apply$1.apply(SparkDemo.scala:158) > at > com.amadeus.spark.example.SparkDemo$$anonfun$main$1$$anonfun$apply$1.apply(SparkDemo.scala:158) > at scala.collection.Iterator$class.foreach(Iterator.scala:727) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) > at org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:798) > at org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:798) > at > org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1503) > at > org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1503) > at > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) > at org.apache.spark.scheduler.Task.run(Task.scala:64) > at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > > Driver stacktrace: > at org.apache.spark.scheduler.DAGScheduler.org > $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:693) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:693) > at scala.Option.foreach(Option.scala:236) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:693) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1393) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > >
CassandraSQLContext throwing NullPointer Exception
Hi All, I am trying to use dataframes (which contain data from cassandra) in rdd.foreach. This is throwing the following exception: Is CassandraSQLContext accessible within executor 15/09/28 17:22:40 ERROR JobScheduler: Error running job streaming job 144344116 ms.0 org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 6.0 failed 4 times, most recent failure: Lost task 0.3 in stage 6.0 (TID 83, blrrndnbudn05.rnd.amadeus.net): java.lang.NullPointerException at org.apache.spark.sql.SQLContext.parseSql(SQLContext.scala:134) at org.apache.spark.sql.cassandra.CassandraSQLContext.cassandraSql(CassandraSQLContext.scala:74) at org.apache.spark.sql.cassandra.CassandraSQLContext.sql(CassandraSQLContext.scala:77) at com.amadeus.spark.example.SparkDemo$.withDataFrames(SparkDemo.scala:170) at com.amadeus.spark.example.SparkDemo$$anonfun$main$1$$anonfun$apply$1.apply(SparkDemo.scala:158) at com.amadeus.spark.example.SparkDemo$$anonfun$main$1$$anonfun$apply$1.apply(SparkDemo.scala:158) at scala.collection.Iterator$class.foreach(Iterator.scala:727) at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) at org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:798) at org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:798) at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1503) at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1503) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:64) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1203) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1192) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1191) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1191) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:693) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:693) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:693) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1393) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1354) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)