Repository: carbondata Updated Branches: refs/heads/master f910cfa98 -> 0992b3b23
[CARBONDATA-2302]Fix some bugs when separate visible and invisible segments info into two files There are some bugs when separate visible and invisible segments info into two files: 1.It will not delete physical data of history segments after separating 2.Generate duplicated segment id This closes #2130 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/0992b3b2 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/0992b3b2 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/0992b3b2 Branch: refs/heads/master Commit: 0992b3b2357235784b0c1d7d59990525fb174670 Parents: f910cfa Author: Zhang Zhichao <[email protected]> Authored: Mon Apr 2 01:09:52 2018 +0800 Committer: Jacky Li <[email protected]> Committed: Mon Apr 2 20:46:18 2018 +0800 ---------------------------------------------------------------------- .../statusmanager/SegmentStatusManager.java | 39 ++++++++++++++++---- .../carbondata/core/util/DeleteLoadFolders.java | 26 ++++++++++++- .../apache/spark/util/CarbonCommandSuite.scala | 11 ++++-- 3 files changed, 62 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/0992b3b2/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java b/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java index 06cf76f..bbc820c 100755 --- a/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java +++ b/core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java @@ -254,12 +254,12 @@ public class SegmentStatusManager { } /** - * This method will create new segment id + * This method will get the max segment id * * @param loadMetadataDetails * @return */ - public static int createNewSegmentId(LoadMetadataDetails[] loadMetadataDetails) { + public static int getMaxSegmentId(LoadMetadataDetails[] loadMetadataDetails) { int newSegmentId = -1; for (int i = 0; i < loadMetadataDetails.length; i++) { try { @@ -283,6 +283,17 @@ public class SegmentStatusManager { } } } + return newSegmentId; + } + + /** + * This method will create new segment id + * + * @param loadMetadataDetails + * @return + */ + public static int createNewSegmentId(LoadMetadataDetails[] loadMetadataDetails) { + int newSegmentId = getMaxSegmentId(loadMetadataDetails); newSegmentId++; return newSegmentId; } @@ -903,13 +914,16 @@ public class SegmentStatusManager { int invisibleSegmentPreserveCnt = CarbonProperties.getInstance().getInvisibleSegmentPreserveCount(); - int invisibleSegmentCnt = SegmentStatusManager.countInvisibleSegments(tuple2.details); + int maxSegmentId = SegmentStatusManager.getMaxSegmentId(tuple2.details); + int invisibleSegmentCnt = SegmentStatusManager.countInvisibleSegments( + tuple2.details, maxSegmentId); + LoadMetadataDetails[] newAddedLoadHistoryList = null; // if execute command 'clean files' or the number of invisible segment info // exceeds the value of 'carbon.invisible.segments.preserve.count', // it need to append the invisible segment list to 'tablestatus.history' file. if (isForceDeletion || (invisibleSegmentCnt > invisibleSegmentPreserveCnt)) { TableStatusReturnTuple tableStatusReturn = separateVisibleAndInvisibleSegments( - tuple2.details, latestMetadata, invisibleSegmentCnt); + tuple2.details, latestMetadata, invisibleSegmentCnt, maxSegmentId); LoadMetadataDetails[] oldLoadHistoryList = readLoadHistoryMetadata( carbonTable.getMetadataPath()); LoadMetadataDetails[] newLoadHistoryList = appendLoadHistoryList( @@ -920,6 +934,8 @@ public class SegmentStatusManager { writeLoadDetailsIntoFile( CarbonTablePath.getTableStatusHistoryFilePath(carbonTable.getTablePath()), newLoadHistoryList); + // the segments which will be moved to history file need to be deleted + newAddedLoadHistoryList = tableStatusReturn.arrayOfLoadHistoryDetails; } else { // update the metadata details from old to new status. List<LoadMetadataDetails> latestStatus = @@ -927,7 +943,8 @@ public class SegmentStatusManager { writeLoadMetadata(identifier, latestStatus); } DeleteLoadFolders.physicalFactAndMeasureMetadataDeletion( - identifier, carbonTable.getMetadataPath(), isForceDeletion, partitionSpecs); + identifier, carbonTable.getMetadataPath(), + newAddedLoadHistoryList, isForceDeletion, partitionSpecs); } else { String dbName = identifier.getCarbonTableIdentifier().getDatabaseName(); String tableName = identifier.getCarbonTableIdentifier().getTableName(); @@ -953,13 +970,17 @@ public class SegmentStatusManager { /** * Get the number of invisible segment info from segment info list. */ - public static int countInvisibleSegments(LoadMetadataDetails[] segmentList) { + public static int countInvisibleSegments( + LoadMetadataDetails[] segmentList, int maxSegmentId) { int invisibleSegmentCnt = 0; if (segmentList.length != 0) { for (LoadMetadataDetails eachSeg : segmentList) { // can not remove segment 0, there are some info will be used later // for example: updateStatusFileName + // also can not remove the max segment id, + // otherwise will impact the generation of segment id if (!eachSeg.getLoadName().equalsIgnoreCase("0") + && !eachSeg.getLoadName().equalsIgnoreCase(String.valueOf(maxSegmentId)) && eachSeg.getVisibility().equalsIgnoreCase("false")) { invisibleSegmentCnt += 1; } @@ -984,7 +1005,8 @@ public class SegmentStatusManager { public static TableStatusReturnTuple separateVisibleAndInvisibleSegments( LoadMetadataDetails[] oldList, LoadMetadataDetails[] newList, - int invisibleSegmentCnt) { + int invisibleSegmentCnt, + int maxSegmentId) { int newSegmentsLength = newList.length; int visibleSegmentCnt = newSegmentsLength - invisibleSegmentCnt; LoadMetadataDetails[] arrayOfVisibleSegments = new LoadMetadataDetails[visibleSegmentCnt]; @@ -996,7 +1018,8 @@ public class SegmentStatusManager { LoadMetadataDetails newSegment = newList[i]; if (i < oldSegmentsLength) { LoadMetadataDetails oldSegment = oldList[i]; - if (newSegment.getLoadName().equalsIgnoreCase("0")) { + if (newSegment.getLoadName().equalsIgnoreCase("0") + || newSegment.getLoadName().equalsIgnoreCase(String.valueOf(maxSegmentId))) { newSegment.setVisibility(oldSegment.getVisibility()); arrayOfVisibleSegments[visibleIdx] = newSegment; visibleIdx++; http://git-wip-us.apache.org/repos/asf/carbondata/blob/0992b3b2/core/src/main/java/org/apache/carbondata/core/util/DeleteLoadFolders.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/DeleteLoadFolders.java b/core/src/main/java/org/apache/carbondata/core/util/DeleteLoadFolders.java index d1cd4fb..25ba553 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/DeleteLoadFolders.java +++ b/core/src/main/java/org/apache/carbondata/core/util/DeleteLoadFolders.java @@ -60,10 +60,32 @@ public final class DeleteLoadFolders { } public static void physicalFactAndMeasureMetadataDeletion( - AbsoluteTableIdentifier absoluteTableIdentifier, String metadataPath, boolean isForceDelete, + AbsoluteTableIdentifier absoluteTableIdentifier, + String metadataPath, + LoadMetadataDetails[] newAddedLoadHistoryList, + boolean isForceDelete, List<PartitionSpec> specs) { LoadMetadataDetails[] currentDetails = SegmentStatusManager.readLoadMetadata(metadataPath); - for (LoadMetadataDetails oneLoad : currentDetails) { + physicalFactAndMeasureMetadataDeletion( + absoluteTableIdentifier, + currentDetails, + isForceDelete, + specs); + if (newAddedLoadHistoryList != null && newAddedLoadHistoryList.length > 0) { + physicalFactAndMeasureMetadataDeletion( + absoluteTableIdentifier, + newAddedLoadHistoryList, + isForceDelete, + specs); + } + } + + public static void physicalFactAndMeasureMetadataDeletion( + AbsoluteTableIdentifier absoluteTableIdentifier, + LoadMetadataDetails[] loadDetails, + boolean isForceDelete, + List<PartitionSpec> specs) { + for (LoadMetadataDetails oneLoad : loadDetails) { if (checkIfLoadCanBeDeletedPhysically(oneLoad, isForceDelete)) { try { if (oneLoad.getSegmentFile() != null) { http://git-wip-us.apache.org/repos/asf/carbondata/blob/0992b3b2/integration/spark2/src/test/scala/org/apache/spark/util/CarbonCommandSuite.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/test/scala/org/apache/spark/util/CarbonCommandSuite.scala b/integration/spark2/src/test/scala/org/apache/spark/util/CarbonCommandSuite.scala index 93777e9..230f4d8 100644 --- a/integration/spark2/src/test/scala/org/apache/spark/util/CarbonCommandSuite.scala +++ b/integration/spark2/src/test/scala/org/apache/spark/util/CarbonCommandSuite.scala @@ -176,17 +176,20 @@ class CarbonCommandSuite extends Spark2QueryTest with BeforeAndAfterAll { sql(s"insert into ${tableName} select 'abc1',1") sql(s"insert into ${tableName} select 'abc2',2") sql(s"insert into ${tableName} select 'abc3',3") - assert(sql(s"show segments for table ${tableName}").collect().length == 4) + sql(s"insert into ${tableName} select 'abc4',4") + sql(s"insert into ${tableName} select 'abc5',5") + sql(s"insert into ${tableName} select 'abc6',6") + assert(sql(s"show segments for table ${tableName}").collect().length == 10) var detail = SegmentStatusManager.readLoadMetadata(carbonTable.getMetadataPath) var historyDetail = SegmentStatusManager.readLoadHistoryMetadata(carbonTable.getMetadataPath) - assert(detail.length == 4) + assert(detail.length == 10) assert(historyDetail.length == 0) sql(s"clean files for table ${tableName}") assert(sql(s"show segments for table ${tableName}").collect().length == 2) detail = SegmentStatusManager.readLoadMetadata(carbonTable.getMetadataPath) historyDetail = SegmentStatusManager.readLoadHistoryMetadata(carbonTable.getMetadataPath) - assert(detail.length == 3) - assert(historyDetail.length == 1) + assert(detail.length == 4) + assert(historyDetail.length == 6) dropTable(tableName) }
