QiangCai commented on a change in pull request #3917: URL: https://github.com/apache/carbondata/pull/3917#discussion_r509855538
########## File path: core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java ########## @@ -1427,6 +1427,25 @@ private CarbonCommonConstants() { public static final String BITSET_PIPE_LINE_DEFAULT = "true"; + public static final String MICROSECONDS_IN_A_DAY = "86400000"; + + /** + * this is the user defined time(in days), when a specific timestamp subdirectory in + * trash folder will expire + */ + @CarbonProperty + public static final String TRASH_EXPIRATION_TIME = "carbon.trash.expiration.time"; Review comment: how about carbon.trash.expiration.days ########## File path: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java ########## @@ -1105,28 +1109,79 @@ public static void cleanSegments(CarbonTable table, List<PartitionSpec> partitio * @throws IOException */ public static void deleteSegment(String tablePath, Segment segment, - List<PartitionSpec> partitionSpecs, - SegmentUpdateStatusManager updateStatusManager) throws Exception { + List<PartitionSpec> partitionSpecs, SegmentUpdateStatusManager updateStatusManager, + SegmentStatus segmentStatus, Boolean isPartitionTable, String timeStamp) + throws Exception { SegmentFileStore fileStore = new SegmentFileStore(tablePath, segment.getSegmentFileName()); List<String> indexOrMergeFiles = fileStore.readIndexFiles(SegmentStatus.SUCCESS, true, FileFactory.getConfiguration()); + List<String> filesToDelete = new ArrayList<>(); Map<String, List<String>> indexFilesMap = fileStore.getIndexFilesMap(); for (Map.Entry<String, List<String>> entry : indexFilesMap.entrySet()) { - FileFactory.deleteFile(entry.getKey()); + // Move the file to the trash folder in case the segment status is insert in progress + if (segmentStatus == SegmentStatus.INSERT_IN_PROGRESS) { + if (!isPartitionTable) { + TrashUtil.moveDataToTrashFolderByFile(tablePath, entry.getKey(), timeStamp + Review comment: how about change method name to TrashUtil.copyFileToTrash? ########## File path: core/src/main/java/org/apache/carbondata/core/util/DeleteLoadFolders.java ########## @@ -138,8 +143,19 @@ public boolean accept(CarbonFile file) { if (filesToBeDeleted.length == 0) { status = true; } else { - for (CarbonFile eachFile : filesToBeDeleted) { + // If the file to be deleted is a carbondata file, index file, index merge file + // or a delta file, copy that file to the trash folder. + if ((eachFile.getName().endsWith(CarbonCommonConstants.FACT_FILE_EXT) || Review comment: better to check by LoadMetadataDetails level, not by file level ########## File path: core/src/main/java/org/apache/carbondata/core/util/DeleteLoadFolders.java ########## @@ -192,11 +208,17 @@ private static boolean checkIfLoadCanBeDeleted(LoadMetadataDetails oneLoad, } private static boolean checkIfLoadCanBeDeletedPhysically(LoadMetadataDetails oneLoad, - boolean isForceDelete) { + boolean isForceDelete, AbsoluteTableIdentifier absoluteTableIdentifier) { // Check if the segment is added externally and path is set then do not delete it if ((SegmentStatus.MARKED_FOR_DELETE == oneLoad.getSegmentStatus() - || SegmentStatus.COMPACTED == oneLoad.getSegmentStatus()) && (oneLoad.getPath() == null + || SegmentStatus.COMPACTED == oneLoad.getSegmentStatus() || SegmentStatus + .INSERT_IN_PROGRESS == oneLoad.getSegmentStatus()) && (oneLoad.getPath() == null Review comment: better to keep insert_in_progress segment at origin place for a period (for example 3 days?) also. after expiration days, move it to trash or delete directly. ########## File path: core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java ########## @@ -1427,6 +1427,25 @@ private CarbonCommonConstants() { public static final String BITSET_PIPE_LINE_DEFAULT = "true"; + public static final String MICROSECONDS_IN_A_DAY = "86400000"; Review comment: public static final long MILLIS_SECONDS_IN_A_DAY = TimeUnit.DAYS.toMillis(1); ########## File path: processing/src/main/java/org/apache/carbondata/processing/loading/TableProcessingOperations.java ########## @@ -53,12 +52,14 @@ private static final Logger LOGGER = LogServiceFactory.getLogService(CarbonLoaderUtil.class.getName()); + private static List<CarbonFile> filesInTrashFolder = new ArrayList<CarbonFile>(); + /** * delete folder which metadata no exist in tablestatus * this method don't check tablestatus history. */ public static void deletePartialLoadDataIfExist(CarbonTable carbonTable, Review comment: this method should move to clean files. another pr is also changing it. https://github.com/apache/carbondata/pull/3935 @Pickupolddriver ########## File path: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java ########## @@ -1105,28 +1109,79 @@ public static void cleanSegments(CarbonTable table, List<PartitionSpec> partitio * @throws IOException */ public static void deleteSegment(String tablePath, Segment segment, - List<PartitionSpec> partitionSpecs, - SegmentUpdateStatusManager updateStatusManager) throws Exception { + List<PartitionSpec> partitionSpecs, SegmentUpdateStatusManager updateStatusManager, + SegmentStatus segmentStatus, Boolean isPartitionTable, String timeStamp) + throws Exception { SegmentFileStore fileStore = new SegmentFileStore(tablePath, segment.getSegmentFileName()); List<String> indexOrMergeFiles = fileStore.readIndexFiles(SegmentStatus.SUCCESS, true, FileFactory.getConfiguration()); + List<String> filesToDelete = new ArrayList<>(); Map<String, List<String>> indexFilesMap = fileStore.getIndexFilesMap(); for (Map.Entry<String, List<String>> entry : indexFilesMap.entrySet()) { - FileFactory.deleteFile(entry.getKey()); + // Move the file to the trash folder in case the segment status is insert in progress + if (segmentStatus == SegmentStatus.INSERT_IN_PROGRESS) { + if (!isPartitionTable) { + TrashUtil.moveDataToTrashFolderByFile(tablePath, entry.getKey(), timeStamp + + CarbonCommonConstants.FILE_SEPARATOR + CarbonCommonConstants.LOAD_FOLDER + segment + .getSegmentNo()); + } else { + TrashUtil.moveDataToTrashFolderByFile(tablePath, entry.getKey(), timeStamp + Review comment: how about do like this: for normal table: timestamp/Fact/Part0/Segment_# for partitable table: timestamp/partition_folder ########## File path: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java ########## @@ -1105,28 +1109,79 @@ public static void cleanSegments(CarbonTable table, List<PartitionSpec> partitio * @throws IOException */ public static void deleteSegment(String tablePath, Segment segment, - List<PartitionSpec> partitionSpecs, - SegmentUpdateStatusManager updateStatusManager) throws Exception { + List<PartitionSpec> partitionSpecs, SegmentUpdateStatusManager updateStatusManager, + SegmentStatus segmentStatus, Boolean isPartitionTable, String timeStamp) + throws Exception { SegmentFileStore fileStore = new SegmentFileStore(tablePath, segment.getSegmentFileName()); List<String> indexOrMergeFiles = fileStore.readIndexFiles(SegmentStatus.SUCCESS, true, FileFactory.getConfiguration()); + List<String> filesToDelete = new ArrayList<>(); Map<String, List<String>> indexFilesMap = fileStore.getIndexFilesMap(); for (Map.Entry<String, List<String>> entry : indexFilesMap.entrySet()) { - FileFactory.deleteFile(entry.getKey()); + // Move the file to the trash folder in case the segment status is insert in progress + if (segmentStatus == SegmentStatus.INSERT_IN_PROGRESS) { Review comment: better to add another moveSegmentToTrash without change this deleteSegment so much, it will need checking segment status once, but many times. ########## File path: core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java ########## @@ -1105,28 +1109,79 @@ public static void cleanSegments(CarbonTable table, List<PartitionSpec> partitio * @throws IOException */ public static void deleteSegment(String tablePath, Segment segment, - List<PartitionSpec> partitionSpecs, - SegmentUpdateStatusManager updateStatusManager) throws Exception { + List<PartitionSpec> partitionSpecs, SegmentUpdateStatusManager updateStatusManager, + SegmentStatus segmentStatus, Boolean isPartitionTable, String timeStamp) + throws Exception { SegmentFileStore fileStore = new SegmentFileStore(tablePath, segment.getSegmentFileName()); List<String> indexOrMergeFiles = fileStore.readIndexFiles(SegmentStatus.SUCCESS, true, FileFactory.getConfiguration()); + List<String> filesToDelete = new ArrayList<>(); Map<String, List<String>> indexFilesMap = fileStore.getIndexFilesMap(); for (Map.Entry<String, List<String>> entry : indexFilesMap.entrySet()) { - FileFactory.deleteFile(entry.getKey()); + // Move the file to the trash folder in case the segment status is insert in progress + if (segmentStatus == SegmentStatus.INSERT_IN_PROGRESS) { + if (!isPartitionTable) { + TrashUtil.moveDataToTrashFolderByFile(tablePath, entry.getKey(), timeStamp + + CarbonCommonConstants.FILE_SEPARATOR + CarbonCommonConstants.LOAD_FOLDER + segment + .getSegmentNo()); + } else { + TrashUtil.moveDataToTrashFolderByFile(tablePath, entry.getKey(), timeStamp + + CarbonCommonConstants.FILE_SEPARATOR + CarbonCommonConstants.LOAD_FOLDER + segment + .getSegmentNo() + CarbonCommonConstants.FILE_SEPARATOR + entry.getKey().substring( + tablePath.length() + 1).split(CarbonCommonConstants.FILE_SEPARATOR)[0]); + } + } + // add the file to the filesToDelete map to delete it after the complete segment + // has been copied. + filesToDelete.add(entry.getKey()); for (String file : entry.getValue()) { String[] deltaFilePaths = updateStatusManager.getDeleteDeltaFilePath(file, segment.getSegmentNo()); for (String deltaFilePath : deltaFilePaths) { - FileFactory.deleteFile(deltaFilePath); + // Move the file to the trash folder in case the segment status is insert in progress + if (segmentStatus == SegmentStatus.INSERT_IN_PROGRESS) { + if (!isPartitionTable) { + TrashUtil.moveDataToTrashFolderByFile(tablePath, deltaFilePath, timeStamp + + CarbonCommonConstants.FILE_SEPARATOR + CarbonCommonConstants.LOAD_FOLDER + segment + .getSegmentNo()); + } else { + TrashUtil.moveDataToTrashFolderByFile(tablePath, deltaFilePath, timeStamp + + CarbonCommonConstants.FILE_SEPARATOR + CarbonCommonConstants.LOAD_FOLDER + segment + .getSegmentNo() + CarbonCommonConstants.FILE_SEPARATOR + deltaFilePath.substring( + tablePath.length() + 1).split(CarbonCommonConstants.FILE_SEPARATOR)[0]); + } + } + filesToDelete.add(deltaFilePath); + } + // If the file to be deleted is a carbondata file, copy that file to the trash folder. + if (file.endsWith(CarbonCommonConstants.FACT_FILE_EXT) && segmentStatus == + SegmentStatus.INSERT_IN_PROGRESS) { + if (!isPartitionTable) { + TrashUtil.moveDataToTrashFolderByFile(tablePath, file, timeStamp + + CarbonCommonConstants.FILE_SEPARATOR + CarbonCommonConstants.LOAD_FOLDER + segment + .getSegmentNo()); + } else { + TrashUtil.moveDataToTrashFolderByFile(tablePath, file, timeStamp + + CarbonCommonConstants.FILE_SEPARATOR + CarbonCommonConstants.LOAD_FOLDER + segment + .getSegmentNo() + CarbonCommonConstants.FILE_SEPARATOR + file.substring(tablePath + .length() + 1).split(CarbonCommonConstants.FILE_SEPARATOR)[0]); + } } - FileFactory.deleteFile(file); + filesToDelete.add(file); } } - deletePhysicalPartition(partitionSpecs, indexFilesMap, indexOrMergeFiles, tablePath); + LoadMetadataDetails loadMetaDataDetail = new LoadMetadataDetails(); + loadMetaDataDetail.setSegmentStatus(segmentStatus); + loadMetaDataDetail.setLoadName(segment.getSegmentNo()); + deletePhysicalPartition(partitionSpecs, indexFilesMap, indexOrMergeFiles, tablePath, + loadMetaDataDetail, filesToDelete, timeStamp); String segmentFilePath = CarbonTablePath.getSegmentFilePath(tablePath, segment.getSegmentFileName()); // Deletes the physical segment file FileFactory.deleteFile(segmentFilePath); Review comment: do we need to move this segment file to trash? ########## File path: core/src/main/java/org/apache/carbondata/core/util/path/TrashUtil.java ########## @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.core.util.path; + +import java.io.File; +import java.io.IOException; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.List; + +import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.constants.CarbonCommonConstants; +import org.apache.carbondata.core.datastore.filesystem.CarbonFile; +import org.apache.carbondata.core.datastore.impl.FileFactory; +import org.apache.carbondata.core.exception.CarbonFileException; +import org.apache.carbondata.core.util.CarbonUtil; + +import org.apache.commons.io.FileUtils; + +import org.apache.log4j.Logger; + +public final class TrashUtil { + + private static final Logger LOGGER = + LogServiceFactory.getLogService(CarbonUtil.class.getName()); + + /** + * The below method copies the complete a file to the trash folder. Provide necessary + * timestamp and the segment number in the suffixToAdd variable, so that the proper folder is + * created in the trash folder. + */ + public static void moveDataToTrashFolderByFile(String carbonTablePath, String pathOfFileToCopy, + String suffixToAdd) { + String trashFolderPath = carbonTablePath + CarbonCommonConstants.FILE_SEPARATOR + Review comment: how about extract the code to CarbonTablePath.getTrashFolder? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org