akashrn5 commented on a change in pull request #4035:
URL: https://github.com/apache/carbondata/pull/4035#discussion_r534788928



##########
File path: 
core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java
##########
@@ -1039,17 +1039,19 @@ private static void 
writeLoadMetadata(AbsoluteTableIdentifier identifier,
     }
   }
 
-  private static ReturnTuple isUpdateRequired(boolean isForceDeletion, 
CarbonTable carbonTable,
-      AbsoluteTableIdentifier absoluteTableIdentifier, LoadMetadataDetails[] 
details) {
+  private static ReturnTuple isUpdateRequired(boolean cleanStaleInProgress, 
boolean
+      cleanCompactedAndMFD, CarbonTable carbonTable, AbsoluteTableIdentifier
+      absoluteTableIdentifier, LoadMetadataDetails[] details) {
     // Delete marked loads
     boolean isUpdateRequired = DeleteLoadFolders
-        .deleteLoadFoldersFromFileSystem(absoluteTableIdentifier, 
isForceDeletion, details,
-            carbonTable.getMetadataPath());
+        .deleteLoadFoldersFromFileSystem(absoluteTableIdentifier, 
cleanStaleInProgress,
+            cleanCompactedAndMFD, details, carbonTable.getMetadataPath());
     return new ReturnTuple(details, isUpdateRequired);
   }
 
-  public static void deleteLoadsAndUpdateMetadata(CarbonTable carbonTable, 
boolean isForceDeletion,
-      List<PartitionSpec> partitionSpecs) throws IOException {
+  public static void deleteLoadsAndUpdateMetadata(CarbonTable carbonTable,
+      List<PartitionSpec> partitionSpecs, Boolean cleanStaleInprogress,
+                                                  Boolean 
cleanCompactedAndMFD) throws IOException {

Review comment:
       please correct the format here

##########
File path: 
core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java
##########
@@ -1039,17 +1039,19 @@ private static void 
writeLoadMetadata(AbsoluteTableIdentifier identifier,
     }
   }
 
-  private static ReturnTuple isUpdateRequired(boolean isForceDeletion, 
CarbonTable carbonTable,
-      AbsoluteTableIdentifier absoluteTableIdentifier, LoadMetadataDetails[] 
details) {
+  private static ReturnTuple isUpdateRequired(boolean cleanStaleInProgress, 
boolean
+      cleanCompactedAndMFD, CarbonTable carbonTable, AbsoluteTableIdentifier
+      absoluteTableIdentifier, LoadMetadataDetails[] details) {
     // Delete marked loads
     boolean isUpdateRequired = DeleteLoadFolders
-        .deleteLoadFoldersFromFileSystem(absoluteTableIdentifier, 
isForceDeletion, details,
-            carbonTable.getMetadataPath());
+        .deleteLoadFoldersFromFileSystem(absoluteTableIdentifier, 
cleanStaleInProgress,
+            cleanCompactedAndMFD, details, carbonTable.getMetadataPath());
     return new ReturnTuple(details, isUpdateRequired);
   }
 
-  public static void deleteLoadsAndUpdateMetadata(CarbonTable carbonTable, 
boolean isForceDeletion,
-      List<PartitionSpec> partitionSpecs) throws IOException {
+  public static void deleteLoadsAndUpdateMetadata(CarbonTable carbonTable,
+      List<PartitionSpec> partitionSpecs, Boolean cleanStaleInprogress,
+                                                  Boolean 
cleanCompactedAndMFD) throws IOException {

Review comment:
       wherever you are calling this methods, please name the Boolean variable

##########
File path: 
core/src/main/java/org/apache/carbondata/core/util/DeleteLoadFolders.java
##########
@@ -173,40 +176,68 @@ public boolean accept(CarbonFile file) {
   }
 
   private static boolean checkIfLoadCanBeDeleted(LoadMetadataDetails oneLoad,
-      boolean isForceDelete) {
-    if ((SegmentStatus.MARKED_FOR_DELETE == oneLoad.getSegmentStatus() ||
-        SegmentStatus.COMPACTED == oneLoad.getSegmentStatus() ||
-        SegmentStatus.INSERT_IN_PROGRESS == oneLoad.getSegmentStatus() ||
-        SegmentStatus.INSERT_OVERWRITE_IN_PROGRESS == 
oneLoad.getSegmentStatus())
-        && oneLoad.getVisibility().equalsIgnoreCase("true")) {
-      if (isForceDelete) {
-        return true;
-      }
-      long deletionTime = oneLoad.getModificationOrDeletionTimestamp();
-      return TrashUtil.isTrashRetentionTimeoutExceeded(deletionTime) && 
CarbonUpdateUtil
-          .isMaxQueryTimeoutExceeded(deletionTime);
+      boolean cleanStaleInProgress, boolean cleanCompactedAndMFD) {
+    if (oneLoad.getVisibility().equalsIgnoreCase("true")) {

Review comment:
       here checking visibility to true, but, when i do delete segment by id or 
date, that time we will set the visibility to false, how its taken care that 
time? is it already handed?

##########
File path: 
core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java
##########
@@ -1095,7 +1099,8 @@ public static void 
deleteLoadsAndUpdateMetadata(CarbonTable carbonTable, boolean
             // if execute command 'clean files' or the number of invisible 
segment info
             // exceeds the value of 'carbon.invisible.segments.preserve.count',
             // it need to append the invisible segment list to 
'tablestatus.history' file.
-            if (isForceDeletion || (invisibleSegmentCnt > 
invisibleSegmentPreserveCnt)) {
+            if (cleanStaleInprogress || cleanCompactedAndMFD || 
(invisibleSegmentCnt >

Review comment:
       `cleanStaleInprogress || cleanCompactedAndMFD` is this condition really 
required here? as functionality is just moving to history file

##########
File path: 
integration/spark/src/main/scala/org/apache/carbondata/events/CleanFilesEvents.scala
##########
@@ -32,7 +32,10 @@ case class CleanFilesPreEvent(carbonTable: CarbonTable, 
sparkSession: SparkSessi
 /**
  *
  * @param carbonTable
+ * @param cleanStaleInProgress
+ * @param cleanCompactedAndMFD

Review comment:
       ifnot adding any description remove these, already variable name depicts 
whats it for

##########
File path: 
integration/spark/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonCleanFilesCommand.scala
##########
@@ -58,8 +58,10 @@ case class CarbonCleanFilesCommand(
   var carbonTable: CarbonTable = _
   var cleanFileCommands: List[CarbonCleanFilesCommand] = List.empty
   val optionsMap = options.getOrElse(List.empty[(String, String)]).toMap
-  // forceClean will empty trash
+  // forceClean will clean the MFD and Compacted segments immediately
   val forceClean = optionsMap.getOrElse("force", "false").toBoolean
+  // stale_inprogress willclean the In Progress segments immediately

Review comment:
       comment is wrong right? because when `stale_inprogress  = true`, you 
will not clean immediately, you will wait for retention time, when both are 
true, only then you will delete immediately, please update it accordingly

##########
File path: 
integration/spark/src/main/scala/org/apache/carbondata/spark/util/CommonUtil.scala
##########
@@ -571,7 +571,7 @@ object CommonUtil {
                 try {
                   val carbonTable = CarbonMetadata.getInstance
                     .getCarbonTable(tableUniqueName)
-                  
SegmentStatusManager.deleteLoadsAndUpdateMetadata(carbonTable, true, null)
+                  
SegmentStatusManager.deleteLoadsAndUpdateMetadata(carbonTable, null, true, 
false)

Review comment:
       please name boolean variables, as the first comment

##########
File path: 
integration/spark/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonCleanFilesCommand.scala
##########
@@ -117,9 +119,9 @@ case class CarbonCleanFilesCommand(
         CleanFilesUtil.cleanStaleSegments(carbonTable)
       }
       if (forceTableClean) {

Review comment:
       can we remove this `forceTableClean`? i dont see anyone using variable 
or passing this as true, and its always false

##########
File path: 
core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java
##########
@@ -1039,17 +1039,19 @@ private static void 
writeLoadMetadata(AbsoluteTableIdentifier identifier,
     }
   }
 
-  private static ReturnTuple isUpdateRequired(boolean isForceDeletion, 
CarbonTable carbonTable,

Review comment:
       there is method `writeLoadMetadata ` which take identifier as input, its 
unused method please check and remove that code

##########
File path: 
integration/spark/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
##########
@@ -125,7 +125,7 @@ case class CarbonLoadDataCommand(databaseNameOp: 
Option[String],
           updateModel = None,
           operationContext = operationContext)
       // Clean up the old invalid segment data before creating a new entry for 
new load.
-      SegmentStatusManager.deleteLoadsAndUpdateMetadata(table, false, 
currPartitions)
+      SegmentStatusManager.deleteLoadsAndUpdateMetadata(table, currPartitions, 
false, false)

Review comment:
       same as above

##########
File path: 
integration/spark/src/main/scala/org/apache/spark/sql/secondaryindex/rdd/SecondaryIndexCreator.scala
##########
@@ -462,7 +462,7 @@ object SecondaryIndexCreator {
       try {
         if (!isCompactionCall) {
           SegmentStatusManager
-            .deleteLoadsAndUpdateMetadata(indexCarbonTable, false, null)
+            .deleteLoadsAndUpdateMetadata(indexCarbonTable, null, false, false)

Review comment:
       name boolean

##########
File path: 
integration/spark/src/main/scala/org/apache/spark/util/CleanFiles.scala
##########
@@ -39,7 +39,8 @@ object CleanFiles {
    *                        drop table from hive metastore so should be very 
careful to use it.
    */
   def cleanFiles(spark: SparkSession, dbName: String, tableName: String,
-     forceTableClean: Boolean = false): Unit = {
+     forceTableClean: Boolean = false, cleanCompactedAndMFD: Boolean = false,
+                 cleanStaleInProgress: Boolean = false ): Unit = {

Review comment:
       please reformat the code

##########
File path: 
integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/cleanfiles/TestCleanFileCommand.scala
##########
@@ -346,4 +346,10 @@ class TestCleanFileCommand extends QueryTest with 
BeforeAndAfterAll {
     sql(s"""INSERT INTO CLEANTEST SELECT "abc", 1, "name"""")
   }
 
+  def removeSegmentEntryFromTableStatusFile(carbonTable: CarbonTable, 
segmentNo: String) : Unit = {

Review comment:
       i see that this method is used in multiple places, please move to 
utility method and use everywhere

##########
File path: 
integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/cleanfiles/TestCleanFileCommand.scala
##########
@@ -212,14 +211,16 @@ class TestCleanFileCommand extends QueryTest with 
BeforeAndAfterAll {
 
   test("test trash folder with 2 segments with same segment number") {
     createTable()
-    sql(s"""INSERT INTO CLEANTEST SELECT "1", 2, "name"""")
+    loadData()
 
     val path = CarbonEnv.getCarbonTable(Some("default"), 
"cleantest")(sqlContext.sparkSession)
       .getTablePath
     val trashFolderPath = CarbonTablePath.getTrashFolderPath(path)
     assert(!FileFactory.isFileExist(trashFolderPath))
     // All 4  segments are made as stale segments, they should be moved to the 
trash folder
-    deleteTableStatusFile(path)
+    // deleteTableStatusFile(path)

Review comment:
       please check all places

##########
File path: 
core/src/main/java/org/apache/carbondata/core/util/CleanFilesUtil.java
##########
@@ -147,17 +147,28 @@ public static void 
cleanStaleSegmentsForPartitionTable(CarbonTable carbonTable)
    * stale segment. Only comparing from tablestatus file, not checking 
tablestatus.history file
    */
   private static void getStaleSegmentFiles(CarbonTable carbonTable, 
List<String> staleSegmentFiles,
-      List<String> redundantSegmentFile) {
+      List<String> redundantSegmentFile) throws IOException {
     String segmentFilesLocation =
         CarbonTablePath.getSegmentFilesLocation(carbonTable.getTablePath());
     List<String> segmentFiles = 
Arrays.stream(FileFactory.getCarbonFile(segmentFilesLocation)
         .listFiles()).map(CarbonFile::getName).collect(Collectors.toList());
     // there are no segments present in the Metadata folder. Can return here
-    if (segmentFiles.size() == 0) {
-      return;
+    // if table status file does not exist return
+    try {
+      if (segmentFiles.size() == 0 || !FileFactory.isFileExist(CarbonTablePath

Review comment:
       agree with @QiangCai , in the below line already it will take care

##########
File path: 
integration/spark/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonInsertIntoWithDf.scala
##########
@@ -108,7 +108,7 @@ case class CarbonInsertIntoWithDf(databaseNameOp: 
Option[String],
           operationContext = operationContext)
 
       // Clean up the old invalid segment data before creating a new entry for 
new load.
-      SegmentStatusManager.deleteLoadsAndUpdateMetadata(table, false, 
currPartitions)
+      SegmentStatusManager.deleteLoadsAndUpdateMetadata(table, currPartitions, 
false, false)

Review comment:
       name boolean

##########
File path: 
core/src/main/java/org/apache/carbondata/core/util/CleanFilesUtil.java
##########
@@ -147,17 +147,28 @@ public static void 
cleanStaleSegmentsForPartitionTable(CarbonTable carbonTable)
    * stale segment. Only comparing from tablestatus file, not checking 
tablestatus.history file
    */
   private static void getStaleSegmentFiles(CarbonTable carbonTable, 
List<String> staleSegmentFiles,
-      List<String> redundantSegmentFile) {
+      List<String> redundantSegmentFile) throws IOException {
     String segmentFilesLocation =
         CarbonTablePath.getSegmentFilesLocation(carbonTable.getTablePath());
     List<String> segmentFiles = 
Arrays.stream(FileFactory.getCarbonFile(segmentFilesLocation)
         .listFiles()).map(CarbonFile::getName).collect(Collectors.toList());
     // there are no segments present in the Metadata folder. Can return here
-    if (segmentFiles.size() == 0) {
-      return;
+    // if table status file does not exist return
+    try {
+      if (segmentFiles.size() == 0 || !FileFactory.isFileExist(CarbonTablePath
+          .getTableStatusFilePath(carbonTable.getTablePath()))) {
+        return;
+      }
+    } catch (IOException e) {
+      LOGGER.error("Unable to Check if tablestatus file exists while getting 
stale segments", e);
+      throw e;
     }
     LoadMetadataDetails[] details = 
SegmentStatusManager.readLoadMetadata(carbonTable
         .getMetadataPath());
+    // return if table status file is empty

Review comment:
       change the comment, there wont be any scenario where the table status 
file is empty

##########
File path: 
integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/cleanfiles/TestCleanFileCommand.scala
##########
@@ -90,25 +100,21 @@ class TestCleanFileCommand extends QueryTest with 
BeforeAndAfterAll {
     val trashFolderPath = CarbonTablePath.getTrashFolderPath(path)
     assert(!FileFactory.isFileExist(trashFolderPath))
     // All 4 segments are made as stale segments and should be moved to trash
-    deleteTableStatusFile(path)
+    // deleteTableStatusFile(path)

Review comment:
       if not required remove , instead of commenting

##########
File path: 
integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/cleanfiles/TestCleanFileCommand.scala
##########
@@ -151,34 +157,31 @@ class TestCleanFileCommand extends QueryTest with 
BeforeAndAfterAll {
 
     val mainTablePath = CarbonEnv.getCarbonTable(Some("default"), 
"cleantest")(sqlContext
       .sparkSession).getTablePath
-    deleteTableStatusFile(mainTablePath)
+    // deleteTableStatusFile(mainTablePath)

Review comment:
       same as above




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to