advancedxy commented on a change in pull request #25863: 
[WIP][SPARK-29037][CORE][SQL] For static partition overwrite, spark may give 
duplicate result.
URL: https://github.com/apache/spark/pull/25863#discussion_r327987988
 
 

 ##########
 File path: 
core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
 ##########
 @@ -271,4 +318,59 @@ class HadoopMapReduceCommitProtocol(
         logWarning(s"Exception while aborting 
${taskContext.getTaskAttemptID}", e)
     }
   }
+
+  /**
+   * Delete the staging output path of current InsertIntoHadoopFsRelation 
operation.
+   */
+  private def deleteStagingInsertOutputPath(fs: FileSystem): Unit = {
+    if (staticPartitionKVs.size == 0) {
+      fs.delete(insertStagingDir, true)
+    } else {
+      var currentLevelPath = new Path(insertStagingDir, 
getStaticPartitionPath(staticPartitionKVs))
+      fs.delete(currentLevelPath, true)
+
+      var complete = false
+      var remainingLevel = staticPartitionKVs.size - 1
+      while (!complete && remainingLevel > 0) {
+        try {
+          currentLevelPath = new Path(insertStagingDir,
+            getStaticPartitionPath(staticPartitionKVs.slice(0, 
remainingLevel)))
+          if (!fs.delete(currentLevelPath, false)) {
+            complete = true
+          }
+          remainingLevel -= 1
+        } catch {
+          case e: Exception =>
+            logWarning(s"Exception occurred when deleting dir: 
$currentLevelPath.", e)
+            complete = true
+        }
+      }
+
+      try {
+        fs.delete(insertStagingDir, false)
+      } catch {
+        case e: Exception =>
+          logWarning(s"Exception occurred when deleting dir: 
$insertStagingDir.", e)
+      }
+    }
+  }
+}
+
+object  HadoopMapReduceCommitProtocol {
+
+  private def escapePathName(path: String): String = {
 
 Review comment:
   Why doing this way?  Because we are in the core package?
   
   Do we have better alternatives? How about we pre escapePathname early, like 
before passing to FileCommitProtocol? 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to