What i am trying to achieve is

Trigger query to get number(i.e.,1,2,3,...n)
for every number i have to trigger another 3 queries.

Thanks,
selvam R

On Wed, Oct 12, 2016 at 4:10 PM, Selvam Raman <sel...@gmail.com> wrote:

> Hi ,
>
> I am reading  parquet file and creating temp table. when i am trying to
> execute query outside of foreach function it is working fine.
> throws nullpointerexception within data frame.foreach function.
>
> code snippet:
>
> String CITATION_QUERY = "select c.citation_num, c.title, c.publisher from
> test c";
>
> Dataset<Row> citation_query = spark.sql(CITATION_QUERY);
>
> System.out.println("mistery:"+citation_query.count());
>
>
> // Iterator<Row> iterofresulDF = resultDF.toLocalIterator();
>
>
> resultDF.foreach(new ForeachFunction<Row>()
>
> {
>
> private static final long serialVersionUID = 1L;
>
> public void call(Row line)
>
> {
>
> Dataset<Row> row = spark.sql(CITATION_QUERY);
>
> System.out.println("mistery row count:"+row.count());
>
> }
>
> });
>
>
> ​Error trace:
>
> 16/10/12 15:59:53 INFO CodecPool: Got brand-new decompressor [.snappy]
>
> 16/10/12 15:59:53 ERROR Executor: Exception in task 0.0 in stage 5.0 (TID
> 5)
>
> java.lang.NullPointerException
>
> at org.apache.spark.sql.SparkSession.sessionState$
> lzycompute(SparkSession.scala:112)
>
> at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:110)
>
> at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:582)
>
> at com.elsevier.datasearch.ExecuteSQL.executeQuery(ExecuteSQL.java:11)
>
> at com.elsevier.datasearch.ProcessPetDB$1.call(ProcessPetDB.java:53)
>
> at com.elsevier.datasearch.ProcessPetDB$1.call(ProcessPetDB.java:1)
>
> at org.apache.spark.sql.Dataset$$anonfun$foreach$2.apply(
> Dataset.scala:2118)
>
> at org.apache.spark.sql.Dataset$$anonfun$foreach$2.apply(
> Dataset.scala:2118)
>
> at scala.collection.Iterator$class.foreach(Iterator.scala:893)
>
> at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
>
> at org.apache.spark.rdd.RDD$$anonfun$foreach$1$$anonfun$
> apply$27.apply(RDD.scala:894)
>
> at org.apache.spark.rdd.RDD$$anonfun$foreach$1$$anonfun$
> apply$27.apply(RDD.scala:894)
>
> at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(
> SparkContext.scala:1916)
>
> at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(
> SparkContext.scala:1916)
>
> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
>
> at org.apache.spark.scheduler.Task.run(Task.scala:86)
>
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
>
> at java.util.concurrent.ThreadPoolExecutor.runWorker(
> ThreadPoolExecutor.java:1142)
>
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(
> ThreadPoolExecutor.java:617)
>
> at java.lang.Thread.run(Thread.java:745)
>
>
>
>
> Driver stacktrace:
>
> at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$
> scheduler$DAGScheduler$$failJobAndIndependentStages(
> DAGScheduler.scala:1454)
>
> at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(
> DAGScheduler.scala:1442)
>
> at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(
> DAGScheduler.scala:1441)
>
> at scala.collection.mutable.ResizableArray$class.foreach(
> ResizableArray.scala:59)
>
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
>
> at org.apache.spark.scheduler.DAGScheduler.abortStage(
> DAGScheduler.scala:1441)
>
> at org.apache.spark.scheduler.DAGScheduler$$anonfun$
> handleTaskSetFailed$1.apply(DAGScheduler.scala:811)
>
> at org.apache.spark.scheduler.DAGScheduler$$anonfun$
> handleTaskSetFailed$1.apply(DAGScheduler.scala:811)
>
> at scala.Option.foreach(Option.scala:257)
>
> at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(
> DAGScheduler.scala:811)
>
> at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.
> doOnReceive(DAGScheduler.scala:1667)
>
> at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.
> onReceive(DAGScheduler.scala:1622)
>
> at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.
> onReceive(DAGScheduler.scala:1611)
>
> at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
>
> at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:632)
>
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:1890)
>
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:1903)
>
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:1916)
>
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:1930)
>
> at org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:894)
>
> at org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:892)
>
> at org.apache.spark.rdd.RDDOperationScope$.withScope(
> RDDOperationScope.scala:151)
>
> at org.apache.spark.rdd.RDDOperationScope$.withScope(
> RDDOperationScope.scala:112)
>
> at org.apache.spark.rdd.RDD.withScope(RDD.scala:358)
>
> at org.apache.spark.rdd.RDD.foreach(RDD.scala:892)
>
> at org.apache.spark.sql.Dataset$$anonfun$foreach$1.apply$mcV$
> sp(Dataset.scala:2108)
>
> at org.apache.spark.sql.Dataset$$anonfun$foreach$1.apply(
> Dataset.scala:2108)
>
> at org.apache.spark.sql.Dataset$$anonfun$foreach$1.apply(
> Dataset.scala:2108)
>
> at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(
> SQLExecution.scala:57)
>
> at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2546)
>
> at org.apache.spark.sql.Dataset.foreach(Dataset.scala:2107)
>
> at org.apache.spark.sql.Dataset.foreach(Dataset.scala:2118)
>
> at com.elsevier.datasearch.ProcessPetDB.main(ProcessPetDB.java:46)
>
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>
> at sun.reflect.NativeMethodAccessorImpl.invoke(
> NativeMethodAccessorImpl.java:62)
>
> at sun.reflect.DelegatingMethodAccessorImpl.invoke(
> DelegatingMethodAccessorImpl.java:43)
>
> at java.lang.reflect.Method.invoke(Method.java:497)
>
> at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$
> deploy$SparkSubmit$$runMain(SparkSubmit.scala:736)
>
> at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:185)
>
> at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:210)
>
> at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:124)
>
> at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
>
> Caused by: java.lang.NullPointerException
>
> at org.apache.spark.sql.SparkSession.sessionState$
> lzycompute(SparkSession.scala:112)
>
> at org.apache.spark.sql.SparkSession.sessionState(SparkSession.scala:110)
>
> at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:582)
>
> at com.elsevier.datasearch.ExecuteSQL.executeQuery(ExecuteSQL.java:11)
>
> at com.elsevier.datasearch.ProcessPetDB$1.call(ProcessPetDB.java:53)
>
> at com.elsevier.datasearch.ProcessPetDB$1.call(ProcessPetDB.java:1)
>
> at org.apache.spark.sql.Dataset$$anonfun$foreach$2.apply(
> Dataset.scala:2118)
>
> at org.apache.spark.sql.Dataset$$anonfun$foreach$2.apply(
> Dataset.scala:2118)
>
> at scala.collection.Iterator$class.foreach(Iterator.scala:893)
>
> at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
>
> at org.apache.spark.rdd.RDD$$anonfun$foreach$1$$anonfun$
> apply$27.apply(RDD.scala:894)
>
> at org.apache.spark.rdd.RDD$$anonfun$foreach$1$$anonfun$
> apply$27.apply(RDD.scala:894)
>
> at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(
> SparkContext.scala:1916)
>
> at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(
> SparkContext.scala:1916)
>
> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70)
>
> at org.apache.spark.scheduler.Task.run(Task.scala:86)
>
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
>
> at java.util.concurrent.ThreadPoolExecutor.runWorker(
> ThreadPoolExecutor.java:1142)
>
> at java.util.concurrent.ThreadPoolExecutor$Worker.run(
> ThreadPoolExecutor.java:617)
>
> at java.lang.Thread.run(Thread.java:745)
>
> 16/10/12 15:59:53 INFO SparkContext: Invoking stop() from shutdown hook
> ​
> ​Please let me know if i am missing anything. Thank you for the help.​
>
> --
> Selvam Raman
> "லஞ்சம் தவிர்த்து நெஞ்சம் நிமிர்த்து"
>



-- 
Selvam Raman
"லஞ்சம் தவிர்த்து நெஞ்சம் நிமிர்த்து"

Reply via email to