akashrn5 commented on a change in pull request #4072:
URL: https://github.com/apache/carbondata/pull/4072#discussion_r570107627
##########
File path:
core/src/main/java/org/apache/carbondata/core/datastore/impl/FileFactory.java
##########
@@ -297,6 +297,10 @@ public static boolean deleteFile(String filePath) throws
IOException {
return getCarbonFile(filePath).deleteFile();
}
+ public static boolean deleteFile(CarbonFile carbonFile) throws IOException {
Review comment:
already delete API is there, please use the same
##########
File path:
core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java
##########
@@ -1126,25 +1130,36 @@ public static void deleteSegmentFile(String tablePath,
Segment segment) throws E
* @param partitionSpecs
* @throws IOException
*/
- public static void deleteSegment(String tablePath, Segment segment,
+ public static long deleteSegment(String tablePath, Segment segment,
List<PartitionSpec> partitionSpecs,
SegmentUpdateStatusManager updateStatusManager) throws Exception {
SegmentFileStore fileStore = new SegmentFileStore(tablePath,
segment.getSegmentFileName());
List<String> indexOrMergeFiles =
fileStore.readIndexFiles(SegmentStatus.SUCCESS, true,
FileFactory.getConfiguration());
+ long sizeFreed = 0;
Map<String, List<String>> indexFilesMap = fileStore.getIndexFilesMap();
for (Map.Entry<String, List<String>> entry : indexFilesMap.entrySet()) {
- FileFactory.deleteFile(entry.getKey());
+ CarbonFile entryCarbonFile = FileFactory.getCarbonFile(entry.getKey());
+ sizeFreed += entryCarbonFile.getSize();
+ FileFactory.deleteFile(entryCarbonFile);
+ LOGGER.info("File deleted after clean files operation: " +
entry.getKey());
for (String file : entry.getValue()) {
String[] deltaFilePaths =
updateStatusManager.getDeleteDeltaFilePath(file,
segment.getSegmentNo());
for (String deltaFilePath : deltaFilePaths) {
- FileFactory.deleteFile(deltaFilePath);
+ CarbonFile deltaCarbonFile =
FileFactory.getCarbonFile(deltaFilePath);
+ sizeFreed += deltaCarbonFile.getSize();
+ FileFactory.deleteFile(deltaCarbonFile);
+ LOGGER.info("File deleted after clean files operation: " +
deltaFilePath);
}
- FileFactory.deleteFile(file);
+ CarbonFile deleteCarbonFile = FileFactory.getCarbonFile(file);
+ sizeFreed += deleteCarbonFile.getSize();
+ FileFactory.deleteFile(deleteCarbonFile);
Review comment:
i can see lot of same code of lines here, may be you can refactor to a
method like `getSizeAndDelete()`
##########
File path:
integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/cleanfiles/TestCleanFilesCommandPartitionTable.scala
##########
@@ -65,14 +65,19 @@ class TestCleanFilesCommandPartitionTable extends QueryTest
with BeforeAndAfterA
loadData()
sql(s"""ALTER TABLE CLEANTEST COMPACT "MINOR" """)
loadData()
+ sql(s"CLEAN FILES FOR TABLE cleantest DRYRUN").show()
+ sql(s"CLEAN FILES FOR TABLE cleantest").show()
Review comment:
.show is a waste call in FTs, please have a proper asserts for all
##########
File path:
core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java
##########
@@ -1297,4 +1356,37 @@ public static TableStatusReturnTuple
separateVisibleAndInvisibleSegments(
return new HashMap<>(0);
}
}
+
+ public static long partitionTableSegmentSize(CarbonTable carbonTable,
LoadMetadataDetails
+ oneLoad, LoadMetadataDetails[] loadMetadataDetails, List<PartitionSpec>
+ partitionSpecs) throws Exception {
+ long size = 0;
+ SegmentFileStore fileStore = new
SegmentFileStore(carbonTable.getTablePath(), oneLoad
+ .getSegmentFile());
+ List<String> indexOrMergeFiles =
fileStore.readIndexFiles(SegmentStatus.SUCCESS, true,
+ FileFactory.getConfiguration());
+ Map<String, List<String>> indexFilesMap = fileStore.getIndexFilesMap();
Review comment:
can't we calculate size together for all segments?, because here
everytime its calling readindex files, and calling File APIs to calculate
sizes, so in case of OBS it will be very slow, better to make it optimized to
calculate one time i feel
##########
File path:
core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java
##########
@@ -1126,25 +1130,36 @@ public static void deleteSegmentFile(String tablePath,
Segment segment) throws E
* @param partitionSpecs
* @throws IOException
*/
- public static void deleteSegment(String tablePath, Segment segment,
+ public static long deleteSegment(String tablePath, Segment segment,
List<PartitionSpec> partitionSpecs,
SegmentUpdateStatusManager updateStatusManager) throws Exception {
SegmentFileStore fileStore = new SegmentFileStore(tablePath,
segment.getSegmentFileName());
List<String> indexOrMergeFiles =
fileStore.readIndexFiles(SegmentStatus.SUCCESS, true,
FileFactory.getConfiguration());
+ long sizeFreed = 0;
Map<String, List<String>> indexFilesMap = fileStore.getIndexFilesMap();
for (Map.Entry<String, List<String>> entry : indexFilesMap.entrySet()) {
- FileFactory.deleteFile(entry.getKey());
+ CarbonFile entryCarbonFile = FileFactory.getCarbonFile(entry.getKey());
+ sizeFreed += entryCarbonFile.getSize();
Review comment:
`entryCarbonFile ` please give a meaningful name like indexfile,
datafile etc
##########
File path:
core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java
##########
@@ -1072,7 +1097,22 @@ public static void
deleteLoadsAndUpdateMetadata(CarbonTable carbonTable, boolean
isUpdateRequired(isForceDeletion, carbonTable,
identifier, details, cleanStaleInprogress);
if (!tuple2.isUpdateRequired) {
- return;
+ try {
+ for (LoadMetadataDetails oneLoad : details) {
+ if (isExpiredSegment(oneLoad,
carbonTable.getAbsoluteTableIdentifier())) {
+ if (!carbonTable.isHivePartitionTable()) {
+ trashSizeRemaining +=
FileFactory.getDirectorySize(CarbonTablePath
+ .getSegmentPath(carbonTable.getTablePath(),
oneLoad.getLoadName()));
+ } else {
+ trashSizeRemaining +=
partitionTableSegmentSize(carbonTable, oneLoad,
+ details, partitionSpecs);
+ }
+ }
+ }
+ } catch (Exception e) {
+ LOG.error("Unable to calculate size of garbage data", e);
+ }
+ return new long[]{sizeFreed, trashSizeRemaining};
Review comment:
dry run is meant to give the size, do you need to give empty size or
fail it? if you give empty its purpose itself not completed.
##########
File path:
core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java
##########
@@ -1072,7 +1097,22 @@ public static void
deleteLoadsAndUpdateMetadata(CarbonTable carbonTable, boolean
isUpdateRequired(isForceDeletion, carbonTable,
identifier, details, cleanStaleInprogress);
if (!tuple2.isUpdateRequired) {
- return;
+ try {
+ for (LoadMetadataDetails oneLoad : details) {
+ if (isExpiredSegment(oneLoad,
carbonTable.getAbsoluteTableIdentifier())) {
+ if (!carbonTable.isHivePartitionTable()) {
Review comment:
why putting negation and getting confusion, just remove negation, swap
if and else block code, looks simple right :)
##########
File path:
core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java
##########
@@ -1125,13 +1165,32 @@ public static void
deleteLoadsAndUpdateMetadata(CarbonTable carbonTable, boolean
CarbonLockUtil.fileUnlock(carbonTableStatusLock,
LockUsage.TABLE_STATUS_LOCK);
}
if (updateCompletionStatus) {
- DeleteLoadFolders
+ long[] cleanFileSizeFreed = DeleteLoadFolders
.physicalFactAndMeasureMetadataDeletion(carbonTable,
newAddedLoadHistoryList,
isForceDeletion, partitionSpecs, cleanStaleInprogress);
+ sizeFreed += cleanFileSizeFreed[0];
+ trashSizeRemaining += cleanFileSizeFreed[1];
+ }
+ }
+ } else {
+ try {
+ for (LoadMetadataDetails oneLoad : metadataDetails) {
+ if (isExpiredSegment(oneLoad,
carbonTable.getAbsoluteTableIdentifier())) {
+ if (!carbonTable.isHivePartitionTable()) {
+ trashSizeRemaining +=
FileFactory.getDirectorySize(CarbonTablePath.getSegmentPath(
+ carbonTable.getTablePath(), oneLoad.getLoadName()));
+ } else {
+ trashSizeRemaining += partitionTableSegmentSize(carbonTable,
oneLoad,
+ metadataDetails, partitionSpecs);
+ }
+ }
}
+ } catch (Exception e) {
+ LOG.error("Unable to calculate size of garbage data", e);
}
}
}
+ return new long[]{sizeFreed, trashSizeRemaining};
Review comment:
same as above, just my opinion, what you guys think @ajantha-bhat
@QiangCai
##########
File path:
integration/spark/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonCleanFilesCommand.scala
##########
@@ -37,11 +38,24 @@ case class CarbonCleanFilesCommand(
databaseNameOp: Option[String],
tableName: String,
options: Map[String, String] = Map.empty,
+ dryRun: Boolean,
isInternalCleanCall: Boolean = false)
extends DataCommand {
val LOGGER: Logger =
LogServiceFactory.getLogService(this.getClass.getCanonicalName)
+ override def output: Seq[AttributeReference] = {
+ if (dryRun) {
+ Seq(
+ AttributeReference("Size that will be Freed", LongType, nullable =
false)(),
Review comment:
better to give a more simple and meaningful name
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]