What i am trying to achieve is Trigger query to get number(i.e.,1,2,3,...n) for every number i have to trigger another 3 queries.
Thanks, selvam R On Wed, Oct 12, 2016 at 4:10 PM, Selvam Raman <sel...@gmail.com> wrote: > Hi , > > I am reading parquet file and creating temp table. when i am trying to > execute query outside of foreach function it is working fine. > throws nullpointerexception within data frame.foreach function. > > code snippet: > > String CITATION_QUERY = "select c.citation_num, c.title, c.publisher from > test c"; > > Dataset<Row> citation_query = spark.sql(CITATION_QUERY); > > System.out.println("mistery:"+citation_query.count()); > > > // Iterator<Row> iterofresulDF = resultDF.toLocalIterator(); > > > resultDF.foreach(new ForeachFunction<Row>() > > { > > private static final long serialVersionUID = 1L; > > public void call(Row line) > > { > > Dataset<Row> row = spark.sql(CITATION_QUERY); > > System.out.println("mistery row count:"+row.count()); > > } > > }); > > > Error trace: > > 16/10/12 15:59:53 INFO CodecPool: Got brand-new decompressor [.snappy] > > 16/10/12 15:59:53 ERROR Executor: Exception in task 0.0 in stage 5.0 (TID > 5) > > java.lang.NullPointerException > > at org.apache.spark.sql.SparkSession.sessionState$ > lzycompute(SparkSession.scala:112) > > at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:110) > > at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:582) > > at com.elsevier.datasearch.ExecuteSQL.executeQuery(ExecuteSQL.java:11) > > at com.elsevier.datasearch.ProcessPetDB$1.call(ProcessPetDB.java:53) > > at com.elsevier.datasearch.ProcessPetDB$1.call(ProcessPetDB.java:1) > > at org.apache.spark.sql.Dataset$$anonfun$foreach$2.apply( > Dataset.scala:2118) > > at org.apache.spark.sql.Dataset$$anonfun$foreach$2.apply( > Dataset.scala:2118) > > at scala.collection.Iterator$class.foreach(Iterator.scala:893) > > at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) > > at org.apache.spark.rdd.RDD$$anonfun$foreach$1$$anonfun$ > apply$27.apply(RDD.scala:894) > > at org.apache.spark.rdd.RDD$$anonfun$foreach$1$$anonfun$ > apply$27.apply(RDD.scala:894) > > at org.apache.spark.SparkContext$$anonfun$runJob$5.apply( > SparkContext.scala:1916) > > at org.apache.spark.SparkContext$$anonfun$runJob$5.apply( > SparkContext.scala:1916) > > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70) > > at org.apache.spark.scheduler.Task.run(Task.scala:86) > > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274) > > at java.util.concurrent.ThreadPoolExecutor.runWorker( > ThreadPoolExecutor.java:1142) > > at java.util.concurrent.ThreadPoolExecutor$Worker.run( > ThreadPoolExecutor.java:617) > > at java.lang.Thread.run(Thread.java:745) > > > > > Driver stacktrace: > > at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$ > scheduler$DAGScheduler$$failJobAndIndependentStages( > DAGScheduler.scala:1454) > > at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply( > DAGScheduler.scala:1442) > > at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply( > DAGScheduler.scala:1441) > > at scala.collection.mutable.ResizableArray$class.foreach( > ResizableArray.scala:59) > > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > > at org.apache.spark.scheduler.DAGScheduler.abortStage( > DAGScheduler.scala:1441) > > at org.apache.spark.scheduler.DAGScheduler$$anonfun$ > handleTaskSetFailed$1.apply(DAGScheduler.scala:811) > > at org.apache.spark.scheduler.DAGScheduler$$anonfun$ > handleTaskSetFailed$1.apply(DAGScheduler.scala:811) > > at scala.Option.foreach(Option.scala:257) > > at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed( > DAGScheduler.scala:811) > > at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > doOnReceive(DAGScheduler.scala:1667) > > at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > onReceive(DAGScheduler.scala:1622) > > at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop. > onReceive(DAGScheduler.scala:1611) > > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > > at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:632) > > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1890) > > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1903) > > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1916) > > at org.apache.spark.SparkContext.runJob(SparkContext.scala:1930) > > at org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:894) > > at org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:892) > > at org.apache.spark.rdd.RDDOperationScope$.withScope( > RDDOperationScope.scala:151) > > at org.apache.spark.rdd.RDDOperationScope$.withScope( > RDDOperationScope.scala:112) > > at org.apache.spark.rdd.RDD.withScope(RDD.scala:358) > > at org.apache.spark.rdd.RDD.foreach(RDD.scala:892) > > at org.apache.spark.sql.Dataset$$anonfun$foreach$1.apply$mcV$ > sp(Dataset.scala:2108) > > at org.apache.spark.sql.Dataset$$anonfun$foreach$1.apply( > Dataset.scala:2108) > > at org.apache.spark.sql.Dataset$$anonfun$foreach$1.apply( > Dataset.scala:2108) > > at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId( > SQLExecution.scala:57) > > at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2546) > > at org.apache.spark.sql.Dataset.foreach(Dataset.scala:2107) > > at org.apache.spark.sql.Dataset.foreach(Dataset.scala:2118) > > at com.elsevier.datasearch.ProcessPetDB.main(ProcessPetDB.java:46) > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > > at sun.reflect.NativeMethodAccessorImpl.invoke( > NativeMethodAccessorImpl.java:62) > > at sun.reflect.DelegatingMethodAccessorImpl.invoke( > DelegatingMethodAccessorImpl.java:43) > > at java.lang.reflect.Method.invoke(Method.java:497) > > at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$ > deploy$SparkSubmit$$runMain(SparkSubmit.scala:736) > > at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:185) > > at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:210) > > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:124) > > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > > Caused by: java.lang.NullPointerException > > at org.apache.spark.sql.SparkSession.sessionState$ > lzycompute(SparkSession.scala:112) > > at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:110) > > at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:582) > > at com.elsevier.datasearch.ExecuteSQL.executeQuery(ExecuteSQL.java:11) > > at com.elsevier.datasearch.ProcessPetDB$1.call(ProcessPetDB.java:53) > > at com.elsevier.datasearch.ProcessPetDB$1.call(ProcessPetDB.java:1) > > at org.apache.spark.sql.Dataset$$anonfun$foreach$2.apply( > Dataset.scala:2118) > > at org.apache.spark.sql.Dataset$$anonfun$foreach$2.apply( > Dataset.scala:2118) > > at scala.collection.Iterator$class.foreach(Iterator.scala:893) > > at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) > > at org.apache.spark.rdd.RDD$$anonfun$foreach$1$$anonfun$ > apply$27.apply(RDD.scala:894) > > at org.apache.spark.rdd.RDD$$anonfun$foreach$1$$anonfun$ > apply$27.apply(RDD.scala:894) > > at org.apache.spark.SparkContext$$anonfun$runJob$5.apply( > SparkContext.scala:1916) > > at org.apache.spark.SparkContext$$anonfun$runJob$5.apply( > SparkContext.scala:1916) > > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70) > > at org.apache.spark.scheduler.Task.run(Task.scala:86) > > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274) > > at java.util.concurrent.ThreadPoolExecutor.runWorker( > ThreadPoolExecutor.java:1142) > > at java.util.concurrent.ThreadPoolExecutor$Worker.run( > ThreadPoolExecutor.java:617) > > at java.lang.Thread.run(Thread.java:745) > > 16/10/12 15:59:53 INFO SparkContext: Invoking stop() from shutdown hook > > Please let me know if i am missing anything. Thank you for the help. > > -- > Selvam Raman > "லஞ்சம் தவிர்த்து நெஞ்சம் நிமிர்த்து" > -- Selvam Raman "லஞ்சம் தவிர்த்து நெஞ்சம் நிமிர்த்து"