[ https://issues.apache.org/jira/browse/SPARK-9638?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Sean Owen resolved SPARK-9638. ------------------------------ Resolution: Duplicate > .save() Procedure fails > ----------------------- > > Key: SPARK-9638 > URL: https://issues.apache.org/jira/browse/SPARK-9638 > Project: Spark > Issue Type: Bug > Components: MLlib, PySpark > Affects Versions: 1.4.1 > Reporter: Stijn Geuens > > I am not able to save a MatrixFactorizationModel I created. > Path "./Models" exists. > Working with pyspark in IPython notebook (spark version = 1.4.1, hadoop > version = 2.6) > Error message: > --------------------------------------------------------------------------- > Py4JJavaError Traceback (most recent call last) > <ipython-input-14-28d4a0d852bb> in <module>() > ----> 1 CFMFModel11.save(sc, "./Models/CFMFModel11") > C:\Users\s.geuens\Spark\spark-1.4.1-bin-hadoop2.6\python\pyspark\mllib\util.pyc > in save(self, sc, path) > 202 > 203 def save(self, sc, path): > --> 204 self._java_model.save(sc._jsc.sc(), path) > 205 > 206 > C:\Users\s.geuens\Spark\spark-1.4.1-bin-hadoop2.6\python\lib\py4j-0.8.2.1-src.zip\py4j\java_gateway.py > in __call__(self, *args) > 536 answer = self.gateway_client.send_command(command) > 537 return_value = get_return_value(answer, self.gateway_client, > --> 538 self.target_id, self.name) > 539 > 540 for temp_arg in temp_args: > C:\Users\s.geuens\Spark\spark-1.4.1-bin-hadoop2.6\python\lib\py4j-0.8.2.1-src.zip\py4j\protocol.py > in get_return_value(answer, gateway_client, target_id, name) > 298 raise Py4JJavaError( > 299 'An error occurred while calling {0}{1}{2}.\n'. > --> 300 format(target_id, '.', name), value) > 301 else: > 302 raise Py4JError( > Py4JJavaError: An error occurred while calling o334.save. > : org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 > in stage 1823.0 failed 1 times, most recent failure: Lost task 0.0 in stage > 1823.0 (TID 489, localhost): java.lang.NullPointerException > at java.lang.ProcessBuilder.start(ProcessBuilder.java:1010) > at org.apache.hadoop.util.Shell.runCommand(Shell.java:482) > at org.apache.hadoop.util.Shell.run(Shell.java:455) > at > org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:715) > at org.apache.hadoop.util.Shell.execCommand(Shell.java:808) > at org.apache.hadoop.util.Shell.execCommand(Shell.java:791) > at > org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:656) > at > org.apache.hadoop.fs.FilterFileSystem.setPermission(FilterFileSystem.java:490) > at > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:462) > at > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:428) > at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:908) > at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:801) > at > org.apache.hadoop.mapred.TextOutputFormat.getRecordWriter(TextOutputFormat.java:123) > at org.apache.spark.SparkHadoopWriter.open(SparkHadoopWriter.scala:90) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13.apply(PairRDDFunctions.scala:1104) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13.apply(PairRDDFunctions.scala:1095) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63) > at org.apache.spark.scheduler.Task.run(Task.scala:70) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > Driver stacktrace: > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1273) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1264) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1263) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1263) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730) > at scala.Option.foreach(Option.scala:236) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:730) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1457) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1418) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org