Hi,

I am getting "Py4JJavaError: An error occurred while calling o545.save"
error while executing below code.

myDF = spark.read.format("csv")\
.option("header",True)\
.option("mode","FAILFAST")\
.schema(myManualSchema)\
.load("C:\\Arnab\\Spark\\data\\2015-summary.csv")

*myDF.write.format("json").mode("overwrite").save("C:\\Arnab\\Spark\\data\\tmp\\my_json_file")*

*Note:- "2015-summary.csv" is 8KB and I have 8GB RAM. No other application
is running while I run this code.*
Stack Trace -

---------------------------------------------------------------------------Py4JJavaError
                            Traceback (most recent call
last)<ipython-input-23-a7940ceaf77d> in <module>----> 1
myDF.write.format("json").mode("overwrite").save("C:\\Arnab\\Spark\\data\\tmp\\my_json_file")
C:\Arnab\Spark\spark-3.0.0-preview2-bin-hadoop3.2\python\pyspark\sql\readwriter.py
in save(self, path, format, mode, partitionBy, **options)    767
      self._jwrite.save()    768         else:--> 769
self._jwrite.save(path)    770     771     @since(1.4)
C:\Arnab\Spark\spark-3.0.0-preview2-bin-hadoop3.2\python\lib\py4j-0.10.8.1-src.zip\py4j\java_gateway.py
in __call__(self, *args)   1284         answer =
self.gateway_client.send_command(command)   1285         return_value
= get_return_value(-> 1286             answer, self.gateway_client,
self.target_id, self.name)   1287    1288         for temp_arg in
temp_args:
C:\Arnab\Spark\spark-3.0.0-preview2-bin-hadoop3.2\python\pyspark\sql\utils.py
in deco(*a, **kw)     96     def deco(*a, **kw):     97
try:---> 98             return f(*a, **kw)     99         except
py4j.protocol.Py4JJavaError as e:    100             converted =
convert_exception(e.java_exception)
C:\Arnab\Spark\spark-3.0.0-preview2-bin-hadoop3.2\python\lib\py4j-0.10.8.1-src.zip\py4j\protocol.py
in get_return_value(answer, gateway_client, target_id, name)    326
             raise Py4JJavaError(    327                     "An error
occurred while calling {0}{1}{2}.\n".--> 328
format(target_id, ".", name), value)    329             else:    330
              raise Py4JError(
Py4JJavaError: An error occurred while calling o799.save.
: org.apache.spark.SparkException: Job aborted.

Full Stack Trace is attached.

Thanks in advance for your help.

Regards
Arnab

---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
<ipython-input-23-a7940ceaf77d> in <module>
----> 1 
myDF.write.format("json").mode("overwrite").save("C:\\Arnab\\Spark\\data\\tmp\\my_json_file")

C:\Arnab\Spark\spark-3.0.0-preview2-bin-hadoop3.2\python\pyspark\sql\readwriter.py
 in save(self, path, format, mode, partitionBy, **options)
    767             self._jwrite.save()
    768         else:
--> 769             self._jwrite.save(path)
    770 
    771     @since(1.4)

C:\Arnab\Spark\spark-3.0.0-preview2-bin-hadoop3.2\python\lib\py4j-0.10.8.1-src.zip\py4j\java_gateway.py
 in __call__(self, *args)
   1284         answer = self.gateway_client.send_command(command)
   1285         return_value = get_return_value(
-> 1286             answer, self.gateway_client, self.target_id, self.name)
   1287 
   1288         for temp_arg in temp_args:

C:\Arnab\Spark\spark-3.0.0-preview2-bin-hadoop3.2\python\pyspark\sql\utils.py 
in deco(*a, **kw)
     96     def deco(*a, **kw):
     97         try:
---> 98             return f(*a, **kw)
     99         except py4j.protocol.Py4JJavaError as e:
    100             converted = convert_exception(e.java_exception)

C:\Arnab\Spark\spark-3.0.0-preview2-bin-hadoop3.2\python\lib\py4j-0.10.8.1-src.zip\py4j\protocol.py
 in get_return_value(answer, gateway_client, target_id, name)
    326                 raise Py4JJavaError(
    327                     "An error occurred while calling {0}{1}{2}.\n".
--> 328                     format(target_id, ".", name), value)
    329             else:
    330                 raise Py4JError(

Py4JJavaError: An error occurred while calling o799.save.
: org.apache.spark.SparkException: Job aborted.
        at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:226)
        at 
org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:178)
        at 
org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:104)
        at 
org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:102)
        at 
org.apache.spark.sql.execution.command.DataWritingCommandExec.doExecute(commands.scala:123)
        at 
org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:173)
        at 
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:211)
        at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
        at 
org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:208)
        at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:169)
        at 
org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:110)
        at 
org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:109)
        at 
org.apache.spark.sql.DataFrameWriter.$anonfun$runCommand$1(DataFrameWriter.scala:828)
        at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$4(SQLExecution.scala:100)
        at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)
        at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:87)
        at 
org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:828)
        at 
org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:309)
        at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:293)
        at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:236)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
        at java.lang.reflect.Method.invoke(Unknown Source)
        at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
        at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
        at py4j.Gateway.invoke(Gateway.java:282)
        at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
        at py4j.commands.CallCommand.execute(CallCommand.java:79)
        at py4j.GatewayConnection.run(GatewayConnection.java:238)
        at java.lang.Thread.run(Unknown Source)
Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: 
Task 0 in stage 8.0 failed 1 times, most recent failure: Lost task 0.0 in stage 
8.0 (TID 8, 192.168.0.15, executor driver): org.apache.spark.SparkException: 
Task failed while writing rows.
        at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:291)
        at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$15(FileFormatWriter.scala:205)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
        at org.apache.spark.scheduler.Task.run(Task.scala:127)
        at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:441)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:444)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
        at java.lang.Thread.run(Unknown Source)
Caused by: java.lang.UnsatisfiedLinkError: 
org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z
        at org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Native Method)
        at 
org.apache.hadoop.io.nativeio.NativeIO$Windows.access(NativeIO.java:645)
        at org.apache.hadoop.fs.FileUtil.canRead(FileUtil.java:1230)
        at org.apache.hadoop.fs.FileUtil.list(FileUtil.java:1435)
        at 
org.apache.hadoop.fs.RawLocalFileSystem.listStatus(RawLocalFileSystem.java:493)
        at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1868)
        at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1910)
        at 
org.apache.hadoop.fs.ChecksumFileSystem.listStatus(ChecksumFileSystem.java:678)
        at 
org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.mergePaths(FileOutputCommitter.java:484)
        at 
org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.commitTask(FileOutputCommitter.java:597)
        at 
org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.commitTask(FileOutputCommitter.java:560)
        at 
org.apache.spark.mapred.SparkHadoopMapRedUtil$.performCommit$1(SparkHadoopMapRedUtil.scala:50)
        at 
org.apache.spark.mapred.SparkHadoopMapRedUtil$.commitTask(SparkHadoopMapRedUtil.scala:77)
        at 
org.apache.spark.internal.io.HadoopMapReduceCommitProtocol.commitTask(HadoopMapReduceCommitProtocol.scala:245)
        at 
org.apache.spark.sql.execution.datasources.FileFormatDataWriter.commit(FileFormatDataWriter.scala:79)
        at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:275)
        at 
org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1411)
        at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:281)
        ... 9 more

Driver stacktrace:
        at 
org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:1989)
        at 
org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:1977)
        at 
org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:1976)
        at 
scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
        at 
scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
        at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
        at 
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1976)
        at 
org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:956)
        at 
org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:956)
        at scala.Option.foreach(Option.scala:407)
        at 
org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:956)
        at 
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2206)
        at 
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2155)
        at 
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2144)
        at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
        at 
org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:758)
        at org.apache.spark.SparkContext.runJob(SparkContext.scala:2116)
        at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:195)
        ... 30 more
Caused by: org.apache.spark.SparkException: Task failed while writing rows.
        at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:291)
        at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$15(FileFormatWriter.scala:205)
        at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
        at org.apache.spark.scheduler.Task.run(Task.scala:127)
        at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:441)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:444)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
        ... 1 more
Caused by: java.lang.UnsatisfiedLinkError: 
org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z
        at org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Native Method)
        at 
org.apache.hadoop.io.nativeio.NativeIO$Windows.access(NativeIO.java:645)
        at org.apache.hadoop.fs.FileUtil.canRead(FileUtil.java:1230)
        at org.apache.hadoop.fs.FileUtil.list(FileUtil.java:1435)
        at 
org.apache.hadoop.fs.RawLocalFileSystem.listStatus(RawLocalFileSystem.java:493)
        at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1868)
        at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1910)
        at 
org.apache.hadoop.fs.ChecksumFileSystem.listStatus(ChecksumFileSystem.java:678)
        at 
org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.mergePaths(FileOutputCommitter.java:484)
        at 
org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.commitTask(FileOutputCommitter.java:597)
        at 
org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter.commitTask(FileOutputCommitter.java:560)
        at 
org.apache.spark.mapred.SparkHadoopMapRedUtil$.performCommit$1(SparkHadoopMapRedUtil.scala:50)
        at 
org.apache.spark.mapred.SparkHadoopMapRedUtil$.commitTask(SparkHadoopMapRedUtil.scala:77)
        at 
org.apache.spark.internal.io.HadoopMapReduceCommitProtocol.commitTask(HadoopMapReduceCommitProtocol.scala:245)
        at 
org.apache.spark.sql.execution.datasources.FileFormatDataWriter.commit(FileFormatDataWriter.scala:79)
        at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:275)
        at 
org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1411)
        at 
org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:281)
        ... 9 more
---------------------------------------------------------------------
To unsubscribe e-mail: user-unsubscr...@spark.apache.org

Reply via email to