jackylk commented on a change in pull request #3589: [CARBONDATA-3667] Insert
stage recover processing of the partition ta…
URL: https://github.com/apache/carbondata/pull/3589#discussion_r369389628
##########
File path:
integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonInsertFromStageCommand.scala
##########
@@ -431,8 +318,53 @@ case class CarbonInsertFromStageCommand(
}.map { future =>
future.get()
}
+ }
+
+ /**
+ * Delete stage file and success file with retry
+ */
+ private def deleteStageFilesWithRetry(
+ executorService: ExecutorService,
+ _stageFiles: Array[(CarbonFile, CarbonFile)]): Unit = {
+ val startTime = System.currentTimeMillis()
+ var retryCount = DELETE_STAGEFILES_RETRY_TIMES
+ var stageFiles = _stageFiles
+ while (stageFiles.length > 0 && retryCount > 0) {
+ retryCount -= 1
+ // 1) delete stage files
+ deleteStageFiles(executorService, stageFiles)
+ // 2) ensure stage files are all deleted
+ stageFiles = ensureStageFilesAreAllDeleted(executorService, stageFiles)
+ }
+ LOGGER.info(s"finished to delete stage files, time taken: " +
+ s"${System.currentTimeMillis() - startTime}ms")
+ // if there are still stage files failed to clean, print log.
+ if (stageFiles.length > 0) {
+ LOGGER.warn(s"failed to clean up stage files:" +
stageFiles.map(_._1.getName).mkString(","))
+ }
+ }
+
+
+ /**
+ * ensure stage files are all deleted
+ */
+ private def ensureStageFilesAreAllDeleted(
+ executorService: ExecutorService,
+ stageFiles: Array[(CarbonFile, CarbonFile)]): Array[(CarbonFile,
CarbonFile)] = {
+ val startTime = System.currentTimeMillis()
+ stageFiles.map { files =>
+ executorService.submit(new Callable[Boolean] {
+ override def call(): Boolean = {
+ files._1.exists() || files._2.exists()
+ }
+ })
+ }.filter { future =>
+ future.get()
+ }
LOGGER.info(s"finished to delete stage files, time taken: " +
Review comment:
This log is incorrect now
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services