marchpure commented on a change in pull request #3917:
URL: https://github.com/apache/carbondata/pull/3917#discussion_r488464250



##########
File path: 
integration/spark/src/main/scala/org/apache/carbondata/cleanfiles/CleanFilesUtil.scala
##########
@@ -0,0 +1,259 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.cleanfiles
+
+import java.util.concurrent.{Executors, ScheduledExecutorService, TimeUnit}
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.fs.permission.{FsAction, FsPermission}
+
+import org.apache.carbondata.common.logging.LogServiceFactory
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.datastore.filesystem.CarbonFile
+import org.apache.carbondata.core.datastore.impl.FileFactory
+import org.apache.carbondata.core.indexstore.PartitionSpec
+import org.apache.carbondata.core.locks.{CarbonLockUtil, ICarbonLock, 
LockUsage}
+import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, 
CarbonMetadata, SegmentFileStore}
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable
+import org.apache.carbondata.core.mutate.CarbonUpdateUtil
+import org.apache.carbondata.core.statusmanager.{SegmentStatus, 
SegmentStatusManager}
+import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil}
+import org.apache.carbondata.core.util.path.CarbonTablePath
+
+object CleanFilesUtil {
+  private val LOGGER = 
LogServiceFactory.getLogService(this.getClass.getCanonicalName)
+
+  /**
+   * The method deletes all data if forceTableCLean <true> and lean garbage 
segment
+   * (MARKED_FOR_DELETE state) if forceTableCLean <false>
+   *
+   * @param dbName                 : Database name
+   * @param tableName              : Table name
+   * @param tablePath              : Table path
+   * @param carbonTable            : CarbonTable Object <null> in case of 
force clean
+   * @param forceTableClean        : <true> for force clean it will delete all 
data
+   *                               <false> it will clean garbage segment 
(MARKED_FOR_DELETE state)
+   * @param currentTablePartitions : Hive Partitions  details
+   */
+  def cleanFiles(
+                  dbName: String,
+                  tableName: String,
+                  tablePath: String,
+                  carbonTable: CarbonTable,
+                  forceTableClean: Boolean,
+                  currentTablePartitions: Option[Seq[PartitionSpec]] = None,
+                  truncateTable: Boolean = false): Unit = {
+    var carbonCleanFilesLock: ICarbonLock = null
+    val absoluteTableIdentifier = if (forceTableClean) {
+      AbsoluteTableIdentifier.from(tablePath, dbName, tableName, tableName)
+    } else {
+      carbonTable.getAbsoluteTableIdentifier
+    }
+    try {
+      val errorMsg = "Clean files request is failed for " +
+        s"$dbName.$tableName" +
+        ". Not able to acquire the clean files lock due to another clean files 
" +
+        "operation is running in the background."
+      // in case of force clean the lock is not required
+      if (forceTableClean) {

Review comment:
       forceTableClean is too violence, please delete it.

##########
File path: 
integration/spark/src/main/scala/org/apache/carbondata/cleanfiles/CleanFilesUtil.scala
##########
@@ -0,0 +1,259 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.cleanfiles
+
+import java.util.concurrent.{Executors, ScheduledExecutorService, TimeUnit}
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.fs.permission.{FsAction, FsPermission}
+
+import org.apache.carbondata.common.logging.LogServiceFactory
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.datastore.filesystem.CarbonFile
+import org.apache.carbondata.core.datastore.impl.FileFactory
+import org.apache.carbondata.core.indexstore.PartitionSpec
+import org.apache.carbondata.core.locks.{CarbonLockUtil, ICarbonLock, 
LockUsage}
+import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, 
CarbonMetadata, SegmentFileStore}
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable
+import org.apache.carbondata.core.mutate.CarbonUpdateUtil
+import org.apache.carbondata.core.statusmanager.{SegmentStatus, 
SegmentStatusManager}
+import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil}
+import org.apache.carbondata.core.util.path.CarbonTablePath
+
+object CleanFilesUtil {
+  private val LOGGER = 
LogServiceFactory.getLogService(this.getClass.getCanonicalName)
+
+  /**
+   * The method deletes all data if forceTableCLean <true> and lean garbage 
segment
+   * (MARKED_FOR_DELETE state) if forceTableCLean <false>
+   *
+   * @param dbName                 : Database name
+   * @param tableName              : Table name
+   * @param tablePath              : Table path
+   * @param carbonTable            : CarbonTable Object <null> in case of 
force clean
+   * @param forceTableClean        : <true> for force clean it will delete all 
data
+   *                               <false> it will clean garbage segment 
(MARKED_FOR_DELETE state)
+   * @param currentTablePartitions : Hive Partitions  details
+   */
+  def cleanFiles(
+                  dbName: String,
+                  tableName: String,
+                  tablePath: String,
+                  carbonTable: CarbonTable,
+                  forceTableClean: Boolean,
+                  currentTablePartitions: Option[Seq[PartitionSpec]] = None,
+                  truncateTable: Boolean = false): Unit = {
+    var carbonCleanFilesLock: ICarbonLock = null
+    val absoluteTableIdentifier = if (forceTableClean) {
+      AbsoluteTableIdentifier.from(tablePath, dbName, tableName, tableName)
+    } else {
+      carbonTable.getAbsoluteTableIdentifier
+    }
+    try {
+      val errorMsg = "Clean files request is failed for " +
+        s"$dbName.$tableName" +
+        ". Not able to acquire the clean files lock due to another clean files 
" +
+        "operation is running in the background."
+      // in case of force clean the lock is not required
+      if (forceTableClean) {
+        FileFactory.deleteAllCarbonFilesOfDir(
+          FileFactory.getCarbonFile(absoluteTableIdentifier.getTablePath))
+      } else {
+        carbonCleanFilesLock =
+          CarbonLockUtil
+            .getLockObject(absoluteTableIdentifier, 
LockUsage.CLEAN_FILES_LOCK, errorMsg)
+        if (truncateTable) {
+          SegmentStatusManager.truncateTable(carbonTable)
+        }
+        SegmentStatusManager.deleteLoadsAndUpdateMetadata(
+          carbonTable, true, currentTablePartitions.map(_.asJava).orNull)
+        CarbonUpdateUtil.cleanUpDeltaFiles(carbonTable, true)
+        currentTablePartitions match {
+          case Some(partitions) =>
+            SegmentFileStore.cleanSegments(
+              carbonTable,
+              currentTablePartitions.map(_.asJava).orNull,
+              true)
+          case _ =>
+        }
+      }
+    } finally {
+      if (currentTablePartitions.equals(None)) {
+        cleanUpPartitionFoldersRecursively(carbonTable, 
List.empty[PartitionSpec])
+      } else {
+        cleanUpPartitionFoldersRecursively(carbonTable, 
currentTablePartitions.get.toList)
+      }
+
+      if (carbonCleanFilesLock != null) {

Review comment:
       If cleanUpPartitionFoldersRecursively throw some exception. The clean 
file lock won't be released.

##########
File path: 
core/src/main/java/org/apache/carbondata/core/util/DeleteLoadFolders.java
##########
@@ -192,11 +204,33 @@ private static boolean 
checkIfLoadCanBeDeleted(LoadMetadataDetails oneLoad,
   }
 
   private static boolean checkIfLoadCanBeDeletedPhysically(LoadMetadataDetails 
oneLoad,
-      boolean isForceDelete) {
+      boolean isForceDelete, AbsoluteTableIdentifier absoluteTableIdentifier) {
     // Check if the segment is added externally and path is set then do not 
delete it
     if ((SegmentStatus.MARKED_FOR_DELETE == oneLoad.getSegmentStatus()
-        || SegmentStatus.COMPACTED == oneLoad.getSegmentStatus()) && 
(oneLoad.getPath() == null
+        || SegmentStatus.COMPACTED == oneLoad.getSegmentStatus() || 
SegmentStatus
+            .INSERT_IN_PROGRESS == oneLoad.getSegmentStatus()) && 
(oneLoad.getPath() == null

Review comment:
       For INSERT_IN_PROGRESS segment
   depend on timeout threshold, not lock
   in your logic, once 'INSERT_IN_PROGRESS' more than 1 hour, the segment will 
be deleted。

##########
File path: 
integration/spark/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonCleanFilesCommand.scala
##########
@@ -108,12 +112,51 @@ case class CarbonCleanFilesCommand(
     Seq.empty
   }
 
+  def deleteStashInMetadataFolder(carbonTable: CarbonTable): Unit = {
+    val tableStatusLock = CarbonLockFactory
+      .getCarbonLockObj(carbonTable.getAbsoluteTableIdentifier, 
LockUsage.TABLE_STATUS_LOCK)
+    val carbonLoadModel = new CarbonLoadModel
+    try {
+      if (tableStatusLock.lockWithRetries()) {
+        val tableStatusFilePath = CarbonTablePath
+          .getTableStatusFilePath(carbonTable.getTablePath)
+        val loadMetaDataDetails = SegmentStatusManager
+          .readTableStatusFile(tableStatusFilePath).filter(details => 
details.getSegmentStatus ==
+          SegmentStatus.SUCCESS || details.getSegmentStatus == 
SegmentStatus.LOAD_PARTIAL_SUCCESS)
+          .sortWith(_.getLoadName < _.getLoadName)
+        
carbonLoadModel.setLoadMetadataDetails(loadMetaDataDetails.toList.asJava)
+      } else {
+        throw new ConcurrentOperationException(carbonTable.getDatabaseName,
+          carbonTable.getTableName, "table status read", "clean files command")
+      }
+    } finally {
+      tableStatusLock.unlock()
+    }
+    val loadMetaDataDetails = carbonLoadModel.getLoadMetadataDetails.asScala
+    val segmentFileList = loadMetaDataDetails.map(f => 
CarbonTablePath.getSegmentFilesLocation(

Review comment:
       Add code:
   if (loadMetaDataDetails.isEmpty) return;

##########
File path: 
integration/spark/src/main/scala/org/apache/carbondata/cleanfiles/CleanFilesUtil.scala
##########
@@ -0,0 +1,259 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.carbondata.cleanfiles
+
+import java.util.concurrent.{Executors, ScheduledExecutorService, TimeUnit}
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.fs.permission.{FsAction, FsPermission}
+
+import org.apache.carbondata.common.logging.LogServiceFactory
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.datastore.filesystem.CarbonFile
+import org.apache.carbondata.core.datastore.impl.FileFactory
+import org.apache.carbondata.core.indexstore.PartitionSpec
+import org.apache.carbondata.core.locks.{CarbonLockUtil, ICarbonLock, 
LockUsage}
+import org.apache.carbondata.core.metadata.{AbsoluteTableIdentifier, 
CarbonMetadata, SegmentFileStore}
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable
+import org.apache.carbondata.core.mutate.CarbonUpdateUtil
+import org.apache.carbondata.core.statusmanager.{SegmentStatus, 
SegmentStatusManager}
+import org.apache.carbondata.core.util.{CarbonProperties, CarbonUtil}
+import org.apache.carbondata.core.util.path.CarbonTablePath
+
+object CleanFilesUtil {
+  private val LOGGER = 
LogServiceFactory.getLogService(this.getClass.getCanonicalName)
+
+  /**
+   * The method deletes all data if forceTableCLean <true> and lean garbage 
segment
+   * (MARKED_FOR_DELETE state) if forceTableCLean <false>
+   *
+   * @param dbName                 : Database name
+   * @param tableName              : Table name
+   * @param tablePath              : Table path
+   * @param carbonTable            : CarbonTable Object <null> in case of 
force clean
+   * @param forceTableClean        : <true> for force clean it will delete all 
data
+   *                               <false> it will clean garbage segment 
(MARKED_FOR_DELETE state)
+   * @param currentTablePartitions : Hive Partitions  details
+   */
+  def cleanFiles(
+                  dbName: String,
+                  tableName: String,
+                  tablePath: String,
+                  carbonTable: CarbonTable,
+                  forceTableClean: Boolean,
+                  currentTablePartitions: Option[Seq[PartitionSpec]] = None,
+                  truncateTable: Boolean = false): Unit = {
+    var carbonCleanFilesLock: ICarbonLock = null
+    val absoluteTableIdentifier = if (forceTableClean) {
+      AbsoluteTableIdentifier.from(tablePath, dbName, tableName, tableName)
+    } else {
+      carbonTable.getAbsoluteTableIdentifier
+    }
+    try {
+      val errorMsg = "Clean files request is failed for " +
+        s"$dbName.$tableName" +
+        ". Not able to acquire the clean files lock due to another clean files 
" +
+        "operation is running in the background."
+      // in case of force clean the lock is not required
+      if (forceTableClean) {
+        FileFactory.deleteAllCarbonFilesOfDir(
+          FileFactory.getCarbonFile(absoluteTableIdentifier.getTablePath))
+      } else {
+        carbonCleanFilesLock =
+          CarbonLockUtil
+            .getLockObject(absoluteTableIdentifier, 
LockUsage.CLEAN_FILES_LOCK, errorMsg)
+        if (truncateTable) {
+          SegmentStatusManager.truncateTable(carbonTable)

Review comment:
       truncateTable is too violence, please delete it.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to