huyuanfeng2018 commented on code in PR #4166:
URL: https://github.com/apache/flink-cdc/pull/4166#discussion_r2517099692


##########
flink-cdc-connect/flink-cdc-source-connectors/flink-cdc-base/src/main/java/org/apache/flink/cdc/connectors/base/utils/SourceRecordUtils.java:
##########
@@ -206,4 +175,165 @@ public static HistoryRecord getHistoryRecord(SourceRecord 
schemaRecord) throws I
         String historyRecordStr = value.getString(HISTORY_RECORD_FIELD);
         return new HistoryRecord(DOCUMENT_READER.read(historyRecordStr));
     }
+
+    /**
+     * Sorts the given finished snapshot splits by their splitStart boundary 
in ascending order. The
+     * first split (splitStart == null) is treated as negative infinity, and 
the last split
+     * (splitEnd == null) is treated as positive infinity.
+     */
+    public static void sortFinishedSplitInfos(List<FinishedSnapshotSplitInfo> 
splits) {
+        if (splits == null || splits.size() <= 1) {
+            return;
+        }
+
+        splits.sort(
+                (leftSplit, rightSplit) -> {
+                    Object[] leftSplitStart = leftSplit.getSplitStart();
+                    Object[] rightSplitStart = rightSplit.getSplitStart();
+
+                    if (leftSplitStart == null && rightSplitStart == null) {
+                        return 0;
+                    }
+                    if (leftSplitStart == null) {
+                        return -1;
+                    }
+                    if (rightSplitStart == null) {
+                        return 1;
+                    }
+
+                    return compareSplit(leftSplitStart, rightSplitStart);
+                });
+    }
+
+    /**
+     * Uses binary search to find the split containing the specified key in a 
sorted split list.
+     *
+     * <p>IMPORTANT: The splits list MUST be sorted by splitStart before 
calling this method. Use
+     * sortFinishedSplitInfos() to sort the list if needed.
+     *
+     * <p>To leverage data locality for append-heavy workloads (e.g. 
auto-increment PKs), this
+     * method checks the first and last splits before applying binary search 
to the remaining
+     * subset.
+     *
+     * @param sortedSplits List of splits sorted by splitStart (MUST be 
sorted!)
+     * @param key The chunk key to search for
+     * @return The split containing the key, or null if not found
+     */
+    public static FinishedSnapshotSplitInfo findSplitByKeyBinary(

Review Comment:
   Thanks for the quick review~
   
   done.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to