discivigour commented on code in PR #7826:
URL: https://github.com/apache/paimon/pull/7826#discussion_r3226790272


##########
paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java:
##########
@@ -155,6 +161,276 @@ private static void mergeCandidates(
         }
     }
 
+    public static Optional<List<ManifestFileMeta>> trySortCompaction(
+            List<ManifestFileMeta> input,
+            List<ManifestFileMeta> newFilesForAbort,
+            ManifestFile manifestFile,
+            RowType partitionType,
+            int rewriteManifestCount,
+            @Nullable Integer manifestReadParallelism)
+            throws Exception {
+        checkArgument(
+                rewriteManifestCount > 0,
+                "Manifest sort rewrite manifest count must be greater than 
0.");
+
+        if (partitionType.getFieldCount() == 0 || input.size() <= 1) {
+            return Optional.empty();
+        }
+
+        // Sort compaction may move a rewritten group before non-rewritten 
manifests. Even though
+        // rewriteGroups keeps the input order inside each group, it cannot 
preserve ordering
+        // dependencies between a DELETE entry and an ADD entry in manifests 
outside the group.
+        for (ManifestFileMeta file : input) {
+            if (file.numDeletedFiles() > 0) {
+                return Optional.empty();
+            }
+        }
+
+        RecordComparator partitionComparator =
+                
CodeGenUtils.newRecordComparator(partitionType.getFieldTypes());
+        List<ManifestFileMeta> compactCandidates = compactCandidates(input, 
partitionComparator);
+        if (compactCandidates.size() <= 1) {
+            return Optional.empty();
+        }
+
+        List<ManifestSortedRun> runs = partitionSortedRuns(compactCandidates, 
partitionComparator);
+        if (runs.size() <= 1) {
+            return Optional.empty();
+        }
+
+        List<ManifestSortedRun> pickedRuns = pickRuns(runs, 
rewriteManifestCount);
+        if (pickedRuns.isEmpty()) {
+            return Optional.empty();
+        }
+
+        List<ManifestFileMeta> pickedFiles = new ArrayList<>();
+        for (ManifestSortedRun run : pickedRuns) {
+            pickedFiles.addAll(run.files);
+        }
+        List<List<ManifestFileMeta>> rewriteGroups =
+                rewriteGroups(pickedFiles, input, partitionComparator);
+        LinkedHashSet<String> rewriteFileNames = new LinkedHashSet<>();
+        for (List<ManifestFileMeta> rewriteGroup : rewriteGroups) {
+            addFileNames(rewriteGroup, rewriteFileNames);
+        }
+        if (rewriteFileNames.size() <= 1) {
+            return Optional.empty();
+        }
+
+        LOG.info(
+                "Start Manifest File Sort Compaction: sortedRuns: {}, 
pickedRuns: {}, pickedFiles: {}",
+                runs.size(),
+                pickedRuns.size(),
+                pickedFiles.size());
+
+        int insertPos = -1;
+        for (int i = 0; i < input.size(); i++) {
+            ManifestFileMeta file = input.get(i);
+            if (rewriteFileNames.contains(file.fileName())) {
+                insertPos = i;
+                break;
+            }
+        }
+
+        List<ManifestFileMeta> rewritten = new ArrayList<>();
+        for (List<ManifestFileMeta> rewriteGroup : rewriteGroups) {
+            Map<FileEntry.Identifier, ManifestEntry> map = new 
LinkedHashMap<>();

Review Comment:
   If 1KB and many 8MB are continuously picked, I think the sort write 
amplification is a bit large. 



##########
paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java:
##########
@@ -155,6 +161,276 @@ private static void mergeCandidates(
         }
     }
 
+    public static Optional<List<ManifestFileMeta>> trySortCompaction(
+            List<ManifestFileMeta> input,
+            List<ManifestFileMeta> newFilesForAbort,
+            ManifestFile manifestFile,
+            RowType partitionType,
+            int rewriteManifestCount,
+            @Nullable Integer manifestReadParallelism)
+            throws Exception {
+        checkArgument(
+                rewriteManifestCount > 0,
+                "Manifest sort rewrite manifest count must be greater than 
0.");
+
+        if (partitionType.getFieldCount() == 0 || input.size() <= 1) {
+            return Optional.empty();
+        }
+
+        // Sort compaction may move a rewritten group before non-rewritten 
manifests. Even though
+        // rewriteGroups keeps the input order inside each group, it cannot 
preserve ordering
+        // dependencies between a DELETE entry and an ADD entry in manifests 
outside the group.
+        for (ManifestFileMeta file : input) {
+            if (file.numDeletedFiles() > 0) {
+                return Optional.empty();
+            }
+        }
+
+        RecordComparator partitionComparator =
+                
CodeGenUtils.newRecordComparator(partitionType.getFieldTypes());
+        List<ManifestFileMeta> compactCandidates = compactCandidates(input, 
partitionComparator);
+        if (compactCandidates.size() <= 1) {
+            return Optional.empty();
+        }
+
+        List<ManifestSortedRun> runs = partitionSortedRuns(compactCandidates, 
partitionComparator);
+        if (runs.size() <= 1) {
+            return Optional.empty();
+        }
+
+        List<ManifestSortedRun> pickedRuns = pickRuns(runs, 
rewriteManifestCount);
+        if (pickedRuns.isEmpty()) {
+            return Optional.empty();
+        }
+
+        List<ManifestFileMeta> pickedFiles = new ArrayList<>();
+        for (ManifestSortedRun run : pickedRuns) {
+            pickedFiles.addAll(run.files);
+        }
+        List<List<ManifestFileMeta>> rewriteGroups =
+                rewriteGroups(pickedFiles, input, partitionComparator);
+        LinkedHashSet<String> rewriteFileNames = new LinkedHashSet<>();
+        for (List<ManifestFileMeta> rewriteGroup : rewriteGroups) {
+            addFileNames(rewriteGroup, rewriteFileNames);
+        }
+        if (rewriteFileNames.size() <= 1) {
+            return Optional.empty();
+        }
+
+        LOG.info(
+                "Start Manifest File Sort Compaction: sortedRuns: {}, 
pickedRuns: {}, pickedFiles: {}",
+                runs.size(),
+                pickedRuns.size(),
+                pickedFiles.size());
+
+        int insertPos = -1;
+        for (int i = 0; i < input.size(); i++) {
+            ManifestFileMeta file = input.get(i);
+            if (rewriteFileNames.contains(file.fileName())) {
+                insertPos = i;
+                break;
+            }
+        }
+
+        List<ManifestFileMeta> rewritten = new ArrayList<>();
+        for (List<ManifestFileMeta> rewriteGroup : rewriteGroups) {
+            Map<FileEntry.Identifier, ManifestEntry> map = new 
LinkedHashMap<>();

Review Comment:
   If 1KB and many 8MBs are continuously picked, I think the sort write 
amplification is a bit large. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to