itholic commented on code in PR #39394:
URL: https://github.com/apache/spark/pull/39394#discussion_r1063122169
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala:
##########
@@ -784,7 +784,7 @@ private[sql] object QueryExecutionErrors extends
QueryErrorsBase {
def taskFailedWhileWritingRowsError(cause: Throwable): Throwable = {
new SparkException(
- errorClass = "_LEGACY_ERROR_TEMP_2054",
+ errorClass = "TASK_WRITE_FAILED",
messageParameters = Map("message" -> cause.getMessage),
cause = cause)
Review Comment:
The message is buried into "Caused" stack, but still can find from the
Caused exception log.
The message may include the file path that could helpful information for
users, so I just keep this.
For example:
**Without message**
```
org.apache.spark.SparkException: [TASK_WRITE_FAILED] Task failed while
writing rows.
at
org.apache.spark.sql.errors.QueryExecutionErrors$.taskFailedWhileWritingRowsError(QueryExecutionErrors.scala:789)
at
org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:400)
at
org.apache.spark.sql.execution.datasources.WriteFilesExec.$anonfun$doExecuteWrite$1(WriteFiles.scala:89)
at
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:888)
at
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:888)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:328)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
at org.apache.spark.scheduler.Task.run(Task.scala:139)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1502)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.hadoop.fs.FileAlreadyExistsException:
file:/Users/haejoon.lee/Desktop/git_store/spark/spark-warehouse/org.apache.spark.sql.sources.InsertSuite/t/_temporary/0/_temporary/attempt_202301052119095866466288587020303_0002_m_000000_3/part1=1/part-00000-9d2e7095-124e-4004-9db9-bddb87197393.c000.snappy.parquet
already exists
at
org.apache.spark.sql.sources.FileExistingTestFileSystem.create(InsertSuite.scala:2331)
at
org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
at
org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:347)
at
org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:314)
at
org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:480)
at
org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
at
org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
at
org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
at
org.apache.spark.sql.execution.datasources.parquet.ParquetUtils$$anon$1.newInstance(ParquetUtils.scala:490)
at
org.apache.spark.sql.execution.datasources.BaseDynamicPartitionDataWriter.renewCurrentWriter(FileFormatDataWriter.scala:298)
at
org.apache.spark.sql.execution.datasources.DynamicPartitionDataSingleWriter.write(FileFormatDataWriter.scala:365)
at
org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithMetrics(FileFormatDataWriter.scala:85)
at
org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithIterator(FileFormatDataWriter.scala:92)
at
org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:383)
at
org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1536)
at
org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:390)
... 15 more
```
**With message**
```
org.apache.spark.SparkException: [TASK_WRITE_FAILED] Task failed while
writing rows.
file:/Users/haejoon.lee/Desktop/git_store/spark/spark-warehouse/org.apache.spark.sql.sources.InsertSuite/t/_temporary/0/_temporary/attempt_202301052119095866466288587020303_0002_m_000000_3/part1=1/part-00000-9d2e7095-124e-4004-9db9-bddb87197393.c000.snappy.parquet
already exists
at
org.apache.spark.sql.errors.QueryExecutionErrors$.taskFailedWhileWritingRowsError(QueryExecutionErrors.scala:789)
at
org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:400)
at
org.apache.spark.sql.execution.datasources.WriteFilesExec.$anonfun$doExecuteWrite$1(WriteFiles.scala:89)
at
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:888)
at
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:888)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:328)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
at org.apache.spark.scheduler.Task.run(Task.scala:139)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1502)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.hadoop.fs.FileAlreadyExistsException:
file:/Users/haejoon.lee/Desktop/git_store/spark/spark-warehouse/org.apache.spark.sql.sources.InsertSuite/t/_temporary/0/_temporary/attempt_202301052119095866466288587020303_0002_m_000000_3/part1=1/part-00000-9d2e7095-124e-4004-9db9-bddb87197393.c000.snappy.parquet
already exists
at
org.apache.spark.sql.sources.FileExistingTestFileSystem.create(InsertSuite.scala:2331)
at
org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
at
org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:347)
at
org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:314)
at
org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:480)
at
org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
at
org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
at
org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
at
org.apache.spark.sql.execution.datasources.parquet.ParquetUtils$$anon$1.newInstance(ParquetUtils.scala:490)
at
org.apache.spark.sql.execution.datasources.BaseDynamicPartitionDataWriter.renewCurrentWriter(FileFormatDataWriter.scala:298)
at
org.apache.spark.sql.execution.datasources.DynamicPartitionDataSingleWriter.write(FileFormatDataWriter.scala:365)
at
org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithMetrics(FileFormatDataWriter.scala:85)
at
org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithIterator(FileFormatDataWriter.scala:92)
at
org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:383)
at
org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1536)
at
org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:390)
... 15 more
```
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala:
##########
@@ -784,7 +784,7 @@ private[sql] object QueryExecutionErrors extends
QueryErrorsBase {
def taskFailedWhileWritingRowsError(cause: Throwable): Throwable = {
new SparkException(
- errorClass = "_LEGACY_ERROR_TEMP_2054",
+ errorClass = "TASK_WRITE_FAILED",
messageParameters = Map("message" -> cause.getMessage),
cause = cause)
Review Comment:
The message is buried into "Caused" stack, but still can find from the
Caused exception log, tho.
The message may include the file path that could helpful information for
users, so I just keep this.
For example:
**Without message**
```
org.apache.spark.SparkException: [TASK_WRITE_FAILED] Task failed while
writing rows.
at
org.apache.spark.sql.errors.QueryExecutionErrors$.taskFailedWhileWritingRowsError(QueryExecutionErrors.scala:789)
at
org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:400)
at
org.apache.spark.sql.execution.datasources.WriteFilesExec.$anonfun$doExecuteWrite$1(WriteFiles.scala:89)
at
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:888)
at
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:888)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:328)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
at org.apache.spark.scheduler.Task.run(Task.scala:139)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1502)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.hadoop.fs.FileAlreadyExistsException:
file:/Users/haejoon.lee/Desktop/git_store/spark/spark-warehouse/org.apache.spark.sql.sources.InsertSuite/t/_temporary/0/_temporary/attempt_202301052119095866466288587020303_0002_m_000000_3/part1=1/part-00000-9d2e7095-124e-4004-9db9-bddb87197393.c000.snappy.parquet
already exists
at
org.apache.spark.sql.sources.FileExistingTestFileSystem.create(InsertSuite.scala:2331)
at
org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
at
org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:347)
at
org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:314)
at
org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:480)
at
org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
at
org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
at
org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
at
org.apache.spark.sql.execution.datasources.parquet.ParquetUtils$$anon$1.newInstance(ParquetUtils.scala:490)
at
org.apache.spark.sql.execution.datasources.BaseDynamicPartitionDataWriter.renewCurrentWriter(FileFormatDataWriter.scala:298)
at
org.apache.spark.sql.execution.datasources.DynamicPartitionDataSingleWriter.write(FileFormatDataWriter.scala:365)
at
org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithMetrics(FileFormatDataWriter.scala:85)
at
org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithIterator(FileFormatDataWriter.scala:92)
at
org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:383)
at
org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1536)
at
org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:390)
... 15 more
```
**With message**
```
org.apache.spark.SparkException: [TASK_WRITE_FAILED] Task failed while
writing rows.
file:/Users/haejoon.lee/Desktop/git_store/spark/spark-warehouse/org.apache.spark.sql.sources.InsertSuite/t/_temporary/0/_temporary/attempt_202301052119095866466288587020303_0002_m_000000_3/part1=1/part-00000-9d2e7095-124e-4004-9db9-bddb87197393.c000.snappy.parquet
already exists
at
org.apache.spark.sql.errors.QueryExecutionErrors$.taskFailedWhileWritingRowsError(QueryExecutionErrors.scala:789)
at
org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:400)
at
org.apache.spark.sql.execution.datasources.WriteFilesExec.$anonfun$doExecuteWrite$1(WriteFiles.scala:89)
at
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:888)
at
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:888)
at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:328)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92)
at
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161)
at org.apache.spark.scheduler.Task.run(Task.scala:139)
at
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1502)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Caused by: org.apache.hadoop.fs.FileAlreadyExistsException:
file:/Users/haejoon.lee/Desktop/git_store/spark/spark-warehouse/org.apache.spark.sql.sources.InsertSuite/t/_temporary/0/_temporary/attempt_202301052119095866466288587020303_0002_m_000000_3/part1=1/part-00000-9d2e7095-124e-4004-9db9-bddb87197393.c000.snappy.parquet
already exists
at
org.apache.spark.sql.sources.FileExistingTestFileSystem.create(InsertSuite.scala:2331)
at
org.apache.parquet.hadoop.util.HadoopOutputFile.create(HadoopOutputFile.java:74)
at
org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:347)
at
org.apache.parquet.hadoop.ParquetFileWriter.<init>(ParquetFileWriter.java:314)
at
org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:480)
at
org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:420)
at
org.apache.parquet.hadoop.ParquetOutputFormat.getRecordWriter(ParquetOutputFormat.java:409)
at
org.apache.spark.sql.execution.datasources.parquet.ParquetOutputWriter.<init>(ParquetOutputWriter.scala:36)
at
org.apache.spark.sql.execution.datasources.parquet.ParquetUtils$$anon$1.newInstance(ParquetUtils.scala:490)
at
org.apache.spark.sql.execution.datasources.BaseDynamicPartitionDataWriter.renewCurrentWriter(FileFormatDataWriter.scala:298)
at
org.apache.spark.sql.execution.datasources.DynamicPartitionDataSingleWriter.write(FileFormatDataWriter.scala:365)
at
org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithMetrics(FileFormatDataWriter.scala:85)
at
org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithIterator(FileFormatDataWriter.scala:92)
at
org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$1(FileFormatWriter.scala:383)
at
org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1536)
at
org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:390)
... 15 more
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]