[CARBONDATA-2805] Fix the ordering mismatch of segment numbers during cutom compaction
Problem: when we have segments from 0 to 6 and i give 1, 2, 3 for custom compaction, then it should create 1.1 as compacted segment, but sometimes it will create 3.1 as compacted segment which is wrong. This is beacuse custom Segment IDs were passing in hashset and finally inserted in hashmap, while identifying segments to be merged. hashmap and hashset does not guarantee the insertion order which may lead to missmatch of segment numbers. Solution: Use LinkedHashSet and LinkedHashMap which always sure about the insertion order. This closes #2585 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/c29aef88 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/c29aef88 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/c29aef88 Branch: refs/heads/external-format Commit: c29aef880a57d1f1297361a5296e77af3904d661 Parents: cfbf7b6 Author: akashrn5 <akashnilu...@gmail.com> Authored: Mon Jul 30 19:22:29 2018 +0530 Committer: manishgupta88 <tomanishgupt...@gmail.com> Committed: Wed Aug 1 13:38:08 2018 +0530 ---------------------------------------------------------------------- .../processing/merger/CarbonDataMergerUtil.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/c29aef88/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java ---------------------------------------------------------------------- diff --git a/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java index 78af751..1162fc2 100644 --- a/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java +++ b/processing/src/main/java/org/apache/carbondata/processing/merger/CarbonDataMergerUtil.java @@ -391,7 +391,6 @@ public final class CarbonDataMergerUtil { CarbonLoadModel carbonLoadModel, long compactionSize, List<LoadMetadataDetails> segments, CompactionType compactionType, List<String> customSegmentIds) throws IOException, MalformedCarbonCommandException { - String tablePath = carbonLoadModel.getTablePath(); Map<String, String> tableLevelProperties = carbonLoadModel.getCarbonDataLoadSchema() .getCarbonTable().getTableInfo().getFactTable().getTableProperties(); List<LoadMetadataDetails> sortedSegments = new ArrayList<LoadMetadataDetails>(segments); @@ -400,7 +399,7 @@ public final class CarbonDataMergerUtil { if (CompactionType.CUSTOM == compactionType) { return identitySegmentsToBeMergedBasedOnSpecifiedSegments(sortedSegments, - new HashSet<>(customSegmentIds)); + new LinkedHashSet<>(customSegmentIds)); } // Check for segments which are qualified for IUD compaction. @@ -424,7 +423,7 @@ public final class CarbonDataMergerUtil { if (CompactionType.MAJOR == compactionType) { listOfSegmentsToBeMerged = identifySegmentsToBeMergedBasedOnSize(compactionSize, - listOfSegmentsLoadedInSameDateInterval, carbonLoadModel, tablePath); + listOfSegmentsLoadedInSameDateInterval, carbonLoadModel); } else { listOfSegmentsToBeMerged = @@ -462,7 +461,7 @@ public final class CarbonDataMergerUtil { List<LoadMetadataDetails> listOfSegments, Set<String> segmentIds) throws MalformedCarbonCommandException { Map<String, LoadMetadataDetails> specifiedSegments = - new HashMap<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); + new LinkedHashMap<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); for (LoadMetadataDetails detail : listOfSegments) { if (segmentIds.contains(detail.getLoadName())) { specifiedSegments.put(detail.getLoadName(), detail); @@ -623,13 +622,12 @@ public final class CarbonDataMergerUtil { * @param listOfSegmentsAfterPreserve the segments list after * preserving the configured number of latest loads * @param carbonLoadModel carbon load model - * @param tablePath the store location of the segment * @return the list of segments that need to be merged * based on the Size in case of Major compaction */ private static List<LoadMetadataDetails> identifySegmentsToBeMergedBasedOnSize( long compactionSize, List<LoadMetadataDetails> listOfSegmentsAfterPreserve, - CarbonLoadModel carbonLoadModel, String tablePath) throws IOException { + CarbonLoadModel carbonLoadModel) throws IOException { List<LoadMetadataDetails> segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);