Hi, I am getting "Py4JJavaError: An error occurred while calling o545.save" error while executing below code.
myDF = spark.read.format("csv")\ .option("header",True)\ .option("mode","FAILFAST")\ .schema(myManualSchema)\ .load("C:\\Arnab\\Spark\\data\\2015-summary.csv") *myDF.write.format("json").mode("overwrite").save("C:\\Arnab\\Spark\\data\\tmp\\my_json_file")* *Note:- "2015-summary.csv" is 8KB and I have 8GB RAM. No other application is running while I run this code.* Stack Trace - ---------------------------------------------------------------------------Py4JJavaError Traceback (most recent call last)<ipython-input-23-a7940ceaf77d> in <module>----> 1 myDF.write.format("json").mode("overwrite").save("C:\\Arnab\\Spark\\data\\tmp\\my_json_file") C:\Arnab\Spark\spark-3.0.0-preview2-bin-hadoop3.2\python\pyspark\sql\readwriter.py in save(self, path, format, mode, partitionBy, **options) 767 self._jwrite.save() 768 else:--> 769 self._jwrite.save(path) 770 771 @since(1.4) C:\Arnab\Spark\spark-3.0.0-preview2-bin-hadoop3.2\python\lib\py4j-0.10.8.1-src.zip\py4j\java_gateway.py in __call__(self, *args) 1284 answer = self.gateway_client.send_command(command) 1285 return_value = get_return_value(-> 1286 answer, self.gateway_client, self.target_id, self.name) 1287 1288 for temp_arg in temp_args: C:\Arnab\Spark\spark-3.0.0-preview2-bin-hadoop3.2\python\pyspark\sql\utils.py in deco(*a, **kw) 96 def deco(*a, **kw): 97 try:---> 98 return f(*a, **kw) 99 except py4j.protocol.Py4JJavaError as e: 100 converted = convert_exception(e.java_exception) C:\Arnab\Spark\spark-3.0.0-preview2-bin-hadoop3.2\python\lib\py4j-0.10.8.1-src.zip\py4j\protocol.py in get_return_value(answer, gateway_client, target_id, name) 326 raise Py4JJavaError( 327 "An error occurred while calling {0}{1}{2}.\n".--> 328 format(target_id, ".", name), value) 329 else: 330 raise Py4JError( Py4JJavaError: An error occurred while calling o799.save. : org.apache.spark.SparkException: Job aborted. Full Stack Trace is attached. Thanks in advance for your help. Regards Arnab
--------------------------------------------------------------------------- Py4JJavaError Traceback (most recent call last) <ipython-input-23-a7940ceaf77d> in <module> ----> 1 myDF.write.format("json").mode("overwrite").save("C:\\Arnab\\Spark\\data\\tmp\\my_json_file") C:\Arnab\Spark\spark-3.0.0-preview2-bin-hadoop3.2\python\pyspark\sql\readwriter.py in save(self, path, format, mode, partitionBy, **options) 767 self._jwrite.save() 768 else: --> 769 self._jwrite.save(path) 770 771 @since(1.4) C:\Arnab\Spark\spark-3.0.0-preview2-bin-hadoop3.2\python\lib\py4j-0.10.8.1-src.zip\py4j\java_gateway.py in __call__(self, *args) 1284 answer = self.gateway_client.send_command(command) 1285 return_value = get_return_value( -> 1286 answer, self.gateway_client, self.target_id, self.name) 1287 1288 for temp_arg in temp_args: C:\Arnab\Spark\spark-3.0.0-preview2-bin-hadoop3.2\python\pyspark\sql\utils.py in deco(*a, **kw) 96 def deco(*a, **kw): 97 try: ---> 98 return f(*a, **kw) 99 except py4j.protocol.Py4JJavaError as e: 100 converted = convert_exception(e.java_exception) C:\Arnab\Spark\spark-3.0.0-preview2-bin-hadoop3.2\python\lib\py4j-0.10.8.1-src.zip\py4j\protocol.py in get_return_value(answer, gateway_client, target_id, name) 326 raise Py4JJavaError( 327 "An error occurred while calling {0}{1}{2}.\n". --> 328 format(target_id, ".", name), value) 329 else: 330 raise Py4JError( Py4JJavaError: An error occurred while calling o799.save. : org.apache.spark.SparkException: Job aborted. at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:226) at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:178) at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:104) at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:102) at org.apache.spark.sql.execution.command.DataWritingCommandExec.doExecute(commands.scala:123) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:173) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:211) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:208) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:169) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:110) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:109) at org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:828) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$4(SQLExecution.scala:100) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:87) at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:828) at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:309) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:293) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:236) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source) at java.lang.reflect.Method.invoke(Unknown Source) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) at py4j.Gateway.invoke(Gateway.java:282) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:238) at java.lang.Thread.run(Unknown Source) Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 8.0 failed 1 times, most recent failure: Lost task 0.0 in stage 8.0 (TID 8, 192.168.0.15, executor driver): org.apache.spark.SparkException: Task failed while writing rows. at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:291) at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$15(FileFormatWriter.scala:205) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:127) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:441) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:444) at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source) at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source) at java.lang.Thread.run(Unknown Source) Caused by: java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z at org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Native Method) at org.apache.hadoop.io.nativeio.NativeIO$Windows.access(NativeIO.java:645) at org.apache.hadoop.fs.FileUtil.canRead(FileUtil.java:1230) at org.apache.hadoop.fs.FileUtil.list(FileUtil.java:1435) at org.apache.hadoop.fs.RawLocalFileSystem.listStatus(RawLocalFileSystem.java:493) at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1868) at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1910) at org.apache.hadoop.fs.ChecksumFileSystem.listStatus(ChecksumFileSystem.java:678) at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.mergePaths(FileOutputCommitter.java:484) at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.commitTask(FileOutputCommitter.java:597) at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.commitTask(FileOutputCommitter.java:560) at org.apache.spark.mapred.SparkHadoopMapRedUtil$.performCommit$1(SparkHadoopMapRedUtil.scala:50) at org.apache.spark.mapred.SparkHadoopMapRedUtil$.commitTask(SparkHadoopMapRedUtil.scala:77) at org.apache.spark.internal.io.HadoopMapReduceCommitProtocol.commitTask(HadoopMapReduceCommitProtocol.scala:245) at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.commit(FileFormatDataWriter.scala:79) at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:275) at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1411) at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:281) ... 9 more Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:1989) at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:1977) at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:1976) at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1976) at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:956) at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:956) at scala.Option.foreach(Option.scala:407) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:956) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2206) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2155) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2144) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:758) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2116) at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:195) ... 30 more Caused by: org.apache.spark.SparkException: Task failed while writing rows. at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:291) at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$15(FileFormatWriter.scala:205) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:127) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:441) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:444) at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source) at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source) ... 1 more Caused by: java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z at org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Native Method) at org.apache.hadoop.io.nativeio.NativeIO$Windows.access(NativeIO.java:645) at org.apache.hadoop.fs.FileUtil.canRead(FileUtil.java:1230) at org.apache.hadoop.fs.FileUtil.list(FileUtil.java:1435) at org.apache.hadoop.fs.RawLocalFileSystem.listStatus(RawLocalFileSystem.java:493) at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1868) at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1910) at org.apache.hadoop.fs.ChecksumFileSystem.listStatus(ChecksumFileSystem.java:678) at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.mergePaths(FileOutputCommitter.java:484) at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.commitTask(FileOutputCommitter.java:597) at org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.commitTask(FileOutputCommitter.java:560) at org.apache.spark.mapred.SparkHadoopMapRedUtil$.performCommit$1(SparkHadoopMapRedUtil.scala:50) at org.apache.spark.mapred.SparkHadoopMapRedUtil$.commitTask(SparkHadoopMapRedUtil.scala:77) at org.apache.spark.internal.io.HadoopMapReduceCommitProtocol.commitTask(HadoopMapReduceCommitProtocol.scala:245) at org.apache.spark.sql.execution.datasources.FileFormatDataWriter.commit(FileFormatDataWriter.scala:79) at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:275) at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1411) at org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:281) ... 9 more
--------------------------------------------------------------------- To unsubscribe e-mail: user-unsubscr...@spark.apache.org