vikramahuja1001 commented on a change in pull request #4072:
URL: https://github.com/apache/carbondata/pull/4072#discussion_r580001530



##########
File path: 
integration/spark/src/main/scala/org/apache/carbondata/trash/DataTrashManager.scala
##########
@@ -87,13 +104,51 @@ object DataTrashManager {
     }
   }
 
-  private def checkAndCleanTrashFolder(carbonTable: CarbonTable, 
isForceDelete: Boolean): Unit = {
+  /**
+   * Checks the size of the segment files as well as datafiles, this method is 
used before and after
+   * clean files operation to check how much space is actually freed, during 
the operation.
+   */
+  def getSizeScreenshot(carbonTable: CarbonTable): Long = {
+    val metadataDetails = 
SegmentStatusManager.readLoadMetadata(carbonTable.getMetadataPath)
+    var size: Long = 0
+    val segmentFileLocation = 
CarbonTablePath.getSegmentFilesLocation(carbonTable.getTablePath)
+    if (FileFactory.isFileExist(segmentFileLocation)) {
+      size += FileFactory.getDirectorySize(segmentFileLocation)
+    }
+    metadataDetails.foreach(oneLoad =>
+      if (oneLoad.getVisibility.toBoolean) {
+        size += calculateSegmentSizeForOneLoad(carbonTable, oneLoad, 
metadataDetails)
+      }
+    )
+    size
+  }
+
+  /**
+   * Method to handle the Clean files dry run operation
+   */
+  def cleanFilesDryRunOperation (
+      carbonTable: CarbonTable,
+      isForceDelete: Boolean,
+      cleanStaleInProgress: Boolean,
+      showStats: Boolean): Seq[Long] = {
+    // get size freed from the trash folder
+    val trashFolderSizeStats = checkAndCleanTrashFolder(carbonTable, 
isForceDelete,
+        isDryRun = true, showStats)
+    // get size that will be deleted (MFD, COmpacted, Inprogress segments)
+    val expiredSegmentsSizeStats = dryRunOnExpiredSegments(carbonTable, 
isForceDelete,
+      cleanStaleInProgress)
+    Seq(trashFolderSizeStats.head + expiredSegmentsSizeStats.head, 
trashFolderSizeStats(1) +
+        expiredSegmentsSizeStats(1))
+  }
+
+  private def checkAndCleanTrashFolder(carbonTable: CarbonTable, 
isForceDelete: Boolean,
+      isDryRun: Boolean, showStats: Boolean): Seq[Long] = {

Review comment:
       done

##########
File path: 
integration/spark/src/main/scala/org/apache/carbondata/trash/DataTrashManager.scala
##########
@@ -121,6 +176,78 @@ object DataTrashManager {
     }
   }
 
+  /**
+   * Does Clean files dry run operation on the expired segments. Returns the 
size freed
+   * during that clean files operation and also shows the remaining trash 
size, which can be
+   * cleaned after those segments are expired
+   */
+  private def dryRunOnExpiredSegments(
+      carbonTable: CarbonTable,
+      isForceDelete: Boolean,
+      cleanStaleInProgress: Boolean): Seq[Long] = {

Review comment:
       done




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to