This is an automated email from the ASF dual-hosted git repository.

lixiao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 4baa2d4  [SPARK-26673][FOLLOWUP][SQL] File Source V2: check existence 
of output path before delete it
4baa2d4 is described below

commit 4baa2d4449e103b15370d284b0ffdf09b4a9c1b7
Author: Gengliang Wang <gengliang.w...@databricks.com>
AuthorDate: Mon Feb 25 16:20:06 2019 -0800

    [SPARK-26673][FOLLOWUP][SQL] File Source V2: check existence of output path 
before delete it
    
    ## What changes were proposed in this pull request?
    This is a followup PR to resolve comment: 
https://github.com/apache/spark/pull/23601#pullrequestreview-207101115
    
    When Spark writes DataFrame with "overwrite" mode, it deletes the output 
path before actual writes. To safely handle the case that the output path 
doesn't exist,  it is suggested to follow the V1 code by checking the existence.
    
    ## How was this patch tested?
    
    Apply https://github.com/apache/spark/pull/23836 and run unit tests
    
    Closes #23889 from gengliangwang/checkFileBeforeOverwrite.
    
    Authored-by: Gengliang Wang <gengliang.w...@databricks.com>
    Signed-off-by: gatorsmile <gatorsm...@gmail.com>
---
 .../apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
index 6a94248..75c9224 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
@@ -16,6 +16,7 @@
  */
 package org.apache.spark.sql.execution.datasources.v2
 
+import java.io.IOException
 import java.util.UUID
 
 import scala.collection.JavaConverters._
@@ -83,7 +84,9 @@ abstract class FileWriteBuilder(options: DataSourceOptions)
         null
 
       case SaveMode.Overwrite =>
-        committer.deleteWithJob(fs, path, true)
+        if (fs.exists(path) && !committer.deleteWithJob(fs, path, true)) {
+          throw new IOException(s"Unable to clear directory $path prior to 
writing to it")
+        }
         committer.setupJob(job)
         new FileBatchWrite(job, description, committer)
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to