vikramahuja1001 commented on a change in pull request #4072: URL: https://github.com/apache/carbondata/pull/4072#discussion_r580001530
########## File path: integration/spark/src/main/scala/org/apache/carbondata/trash/DataTrashManager.scala ########## @@ -87,13 +104,51 @@ object DataTrashManager { } } - private def checkAndCleanTrashFolder(carbonTable: CarbonTable, isForceDelete: Boolean): Unit = { + /** + * Checks the size of the segment files as well as datafiles, this method is used before and after + * clean files operation to check how much space is actually freed, during the operation. + */ + def getSizeScreenshot(carbonTable: CarbonTable): Long = { + val metadataDetails = SegmentStatusManager.readLoadMetadata(carbonTable.getMetadataPath) + var size: Long = 0 + val segmentFileLocation = CarbonTablePath.getSegmentFilesLocation(carbonTable.getTablePath) + if (FileFactory.isFileExist(segmentFileLocation)) { + size += FileFactory.getDirectorySize(segmentFileLocation) + } + metadataDetails.foreach(oneLoad => + if (oneLoad.getVisibility.toBoolean) { + size += calculateSegmentSizeForOneLoad(carbonTable, oneLoad, metadataDetails) + } + ) + size + } + + /** + * Method to handle the Clean files dry run operation + */ + def cleanFilesDryRunOperation ( + carbonTable: CarbonTable, + isForceDelete: Boolean, + cleanStaleInProgress: Boolean, + showStats: Boolean): Seq[Long] = { + // get size freed from the trash folder + val trashFolderSizeStats = checkAndCleanTrashFolder(carbonTable, isForceDelete, + isDryRun = true, showStats) + // get size that will be deleted (MFD, COmpacted, Inprogress segments) + val expiredSegmentsSizeStats = dryRunOnExpiredSegments(carbonTable, isForceDelete, + cleanStaleInProgress) + Seq(trashFolderSizeStats.head + expiredSegmentsSizeStats.head, trashFolderSizeStats(1) + + expiredSegmentsSizeStats(1)) + } + + private def checkAndCleanTrashFolder(carbonTable: CarbonTable, isForceDelete: Boolean, + isDryRun: Boolean, showStats: Boolean): Seq[Long] = { Review comment: done ########## File path: integration/spark/src/main/scala/org/apache/carbondata/trash/DataTrashManager.scala ########## @@ -121,6 +176,78 @@ object DataTrashManager { } } + /** + * Does Clean files dry run operation on the expired segments. Returns the size freed + * during that clean files operation and also shows the remaining trash size, which can be + * cleaned after those segments are expired + */ + private def dryRunOnExpiredSegments( + carbonTable: CarbonTable, + isForceDelete: Boolean, + cleanStaleInProgress: Boolean): Seq[Long] = { Review comment: done ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org