[
https://issues.apache.org/jira/browse/MESOS-1535?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Ajay Viswanathan updated MESOS-1535:
------------------------------------
Description:
This is an error that I get while running fine-grained PySpark on the mesos
cluster. This comes after running some 200-1000 tasks generally.
Pyspark code:
while True:
sc.parallelize(range(10)).map(lambda n : n*2).collect()
Error log:
(In console)
ERROR DAGSchedulerActorSupervisor: eventProcesserActor failed due to the error
EOF reached before Python server acknowledged; shutting down SparkContext
org.apache.spark.SparkException: Job 75 cancelled as part of cancellation of
all jobs
at
org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$fail
at
org.apache.spark.scheduler.DAGScheduler.handleJobCancellation(DAGScheduler.scala:998)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply$mcVI$sp(DAGS
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply(DAGScheduler
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply(DAGScheduler
at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
at
org.apache.spark.scheduler.DAGScheduler.doCancelAllJobs(DAGScheduler.scala:499)
at
org.apache.spark.scheduler.DAGSchedulerActorSupervisor$$anonfun$2.applyOrElse(DAGSche
at
org.apache.spark.scheduler.DAGSchedulerActorSupervisor$$anonfun$2.applyOrElse(DAGSche
at akka.actor.SupervisorStrategy.handleFailure(FaultHandling.scala:295)
at
akka.actor.dungeon.FaultHandling$class.handleFailure(FaultHandling.scala:253)
at akka.actor.ActorCell.handleFailure(ActorCell.scala:338)
at akka.actor.ActorCell.invokeAll$1(ActorCell.scala:423)
at akka.actor.ActorCell.systemInvoke(ActorCell.scala:447)
at akka.dispatch.Mailbox.processAllSystemMessages(Mailbox.scala:262)
at akka.dispatch.Mailbox.run(Mailbox.scala:218)
at
akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.s
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at
scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at
scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at
scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
>>> 14/06/24 02:58:19 ERROR OneForOneStrategy:
java.lang.UnsupportedOperationException
at
org.apache.spark.scheduler.SchedulerBackend$class.killTask(SchedulerBackend.scala:32)
at
org.apache.spark.scheduler.cluster.mesos.MesosSchedulerBackend.killTask(MesosSchedule
at
org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3$$anonfun$apply$1.
at
org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3$$anonfun$apply$1.
at
org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3$$anonfun$apply$1.
at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
at
org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3.apply(TaskSchedul
at
org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3.apply(TaskSchedul
at scala.Option.foreach(Option.scala:236)
at
org.apache.spark.scheduler.TaskSchedulerImpl.cancelTasks(TaskSchedulerImpl.scala:176)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGSchedu
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGSchedu
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGSchedu
at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
at
org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$fail
at
org.apache.spark.scheduler.DAGScheduler.handleJobCancellation(DAGScheduler.scala:998)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply$mcVI$sp(DAGS
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply(DAGScheduler
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply(DAGScheduler
at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
at
org.apache.spark.scheduler.DAGScheduler.doCancelAllJobs(DAGScheduler.scala:499)
at
org.apache.spark.scheduler.DAGSchedulerActorSupervisor$$anonfun$2.applyOrElse(DAGSche
at
org.apache.spark.scheduler.DAGSchedulerActorSupervisor$$anonfun$2.applyOrElse(DAGSche
at akka.actor.SupervisorStrategy.handleFailure(FaultHandling.scala:295)
at
akka.actor.dungeon.FaultHandling$class.handleFailure(FaultHandling.scala:253)
at akka.actor.ActorCell.handleFailure(ActorCell.scala:338)
at akka.actor.ActorCell.invokeAll$1(ActorCell.scala:423)
at akka.actor.ActorCell.systemInvoke(ActorCell.scala:447)
at akka.dispatch.Mailbox.processAllSystemMessages(Mailbox.scala:262)
at akka.dispatch.Mailbox.run(Mailbox.scala:218)
at
akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.s
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at
scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at
scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at
scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
was:
This is an error that I get while running fine-grained PySpark on the mesos
cluster. This comes after running some 200-1000 tasks generally.
Pyspark code:
while True:
sc.parallelize(range(10)).map(lambda n : n*2).collect()
Error log:
org.apache.spark.SparkException: EOF reached before Python server acknowledged
at
org.apache.spark.api.python.PythonAccumulatorParam.addInPlace(PythonR
DD.scala:322)
at
org.apache.spark.api.python.PythonAccumulatorParam.addInPlace(PythonR
DD.scala:293)
at org.apache.spark.Accumulable.$plus$plus$eq(Accumulators.scala:70)
at
org.apache.spark.Accumulators$$anonfun$add$2.apply(Accumulators.scala
:275)
at
org.apache.spark.Accumulators$$anonfun$add$2.apply(Accumulators.scala
:273)
at
scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(
TraversableLike.scala:772)
at
scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.sca
la:98)
at
scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.sca
la:98)
at
scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala
:226)
at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:39)
at scala.collection.mutable.HashMap.foreach(HashMap.scala:98)
at
scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.s
cala:771)
at org.apache.spark.Accumulators$.add(Accumulators.scala:273)
at
org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGSched
uler.scala:826)
at
org.apache.spark.scheduler.DAGScheduler.processEvent(DAGScheduler.sca
la:601)
at
org.apache.spark.scheduler.DAGScheduler$$anonfun$start$1$$anon$2$$ano
nfun$receive$1.applyOrElse(DAGScheduler.scala:190)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
at akka.actor.ActorCell.invoke(ActorCell.scala:456)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
at akka.dispatch.Mailbox.run(Mailbox.scala:219)
at
akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(Abst
ractDispatcher.scala:386)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at
scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool
.java:1339)
at
scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:19
79)
at
scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThre
ad.java:107)
> Pyspark on Mesos scheduler error
> --------------------------------
>
> Key: MESOS-1535
> URL: https://issues.apache.org/jira/browse/MESOS-1535
> Project: Mesos
> Issue Type: Bug
> Affects Versions: 0.18.0, 0.18.1
> Environment: Running a Mesos on a cluster of Centos 6.5 machines. 180
> GB memory.
> Reporter: Ajay Viswanathan
> Labels: pyspark
>
> This is an error that I get while running fine-grained PySpark on the mesos
> cluster. This comes after running some 200-1000 tasks generally.
> Pyspark code:
> while True:
> sc.parallelize(range(10)).map(lambda n : n*2).collect()
> Error log:
> (In console)
> ERROR DAGSchedulerActorSupervisor: eventProcesserActor failed due to the
> error EOF reached before Python server acknowledged; shutting down
> SparkContext
> org.apache.spark.SparkException: Job 75 cancelled as part of cancellation of
> all jobs
> at
> org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$fail
> at
> org.apache.spark.scheduler.DAGScheduler.handleJobCancellation(DAGScheduler.scala:998)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply$mcVI$sp(DAGS
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply(DAGScheduler
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply(DAGScheduler
> at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
> at
> org.apache.spark.scheduler.DAGScheduler.doCancelAllJobs(DAGScheduler.scala:499)
> at
> org.apache.spark.scheduler.DAGSchedulerActorSupervisor$$anonfun$2.applyOrElse(DAGSche
> at
> org.apache.spark.scheduler.DAGSchedulerActorSupervisor$$anonfun$2.applyOrElse(DAGSche
> at
> akka.actor.SupervisorStrategy.handleFailure(FaultHandling.scala:295)
> at
> akka.actor.dungeon.FaultHandling$class.handleFailure(FaultHandling.scala:253)
> at akka.actor.ActorCell.handleFailure(ActorCell.scala:338)
> at akka.actor.ActorCell.invokeAll$1(ActorCell.scala:423)
> at akka.actor.ActorCell.systemInvoke(ActorCell.scala:447)
> at akka.dispatch.Mailbox.processAllSystemMessages(Mailbox.scala:262)
> at akka.dispatch.Mailbox.run(Mailbox.scala:218)
> at
> akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.s
> at
> scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
> at
> scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
> at
> scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
> at
> scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
> >>> 14/06/24 02:58:19 ERROR OneForOneStrategy:
> java.lang.UnsupportedOperationException
> at
> org.apache.spark.scheduler.SchedulerBackend$class.killTask(SchedulerBackend.scala:32)
> at
> org.apache.spark.scheduler.cluster.mesos.MesosSchedulerBackend.killTask(MesosSchedule
> at
> org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3$$anonfun$apply$1.
> at
> org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3$$anonfun$apply$1.
> at
> org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3$$anonfun$apply$1.
> at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
> at
> org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3.apply(TaskSchedul
> at
> org.apache.spark.scheduler.TaskSchedulerImpl$$anonfun$cancelTasks$3.apply(TaskSchedul
> at scala.Option.foreach(Option.scala:236)
> at
> org.apache.spark.scheduler.TaskSchedulerImpl.cancelTasks(TaskSchedulerImpl.scala:176)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGSchedu
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGSchedu
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGSchedu
> at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
> at
> org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$fail
> at
> org.apache.spark.scheduler.DAGScheduler.handleJobCancellation(DAGScheduler.scala:998)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply$mcVI$sp(DAGS
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply(DAGScheduler
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$doCancelAllJobs$1.apply(DAGScheduler
> at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
> at
> org.apache.spark.scheduler.DAGScheduler.doCancelAllJobs(DAGScheduler.scala:499)
> at
> org.apache.spark.scheduler.DAGSchedulerActorSupervisor$$anonfun$2.applyOrElse(DAGSche
> at
> org.apache.spark.scheduler.DAGSchedulerActorSupervisor$$anonfun$2.applyOrElse(DAGSche
> at
> akka.actor.SupervisorStrategy.handleFailure(FaultHandling.scala:295)
> at
> akka.actor.dungeon.FaultHandling$class.handleFailure(FaultHandling.scala:253)
> at akka.actor.ActorCell.handleFailure(ActorCell.scala:338)
> at akka.actor.ActorCell.invokeAll$1(ActorCell.scala:423)
> at akka.actor.ActorCell.systemInvoke(ActorCell.scala:447)
> at akka.dispatch.Mailbox.processAllSystemMessages(Mailbox.scala:262)
> at akka.dispatch.Mailbox.run(Mailbox.scala:218)
> at
> akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.s
> at
> scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
> at
> scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
> at
> scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
> at
> scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
--
This message was sent by Atlassian JIRA
(v6.2#6252)