This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new b01ae22236d6 fix: sort partitions after filtering for clustering
planning (#18092)
b01ae22236d6 is described below
commit b01ae22236d626379fc893148f72c3dfbf138beb
Author: Prashant Wason <[email protected]>
AuthorDate: Tue Mar 10 20:38:05 2026 -0700
fix: sort partitions after filtering for clustering planning (#18092)
---
.../strategy/PartitionAwareClusteringPlanStrategy.java | 5 ++++-
.../TestSparkClusteringPlanPartitionFilter.java | 18 +++++++++---------
2 files changed, 13 insertions(+), 10 deletions(-)
diff --git
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/PartitionAwareClusteringPlanStrategy.java
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/PartitionAwareClusteringPlanStrategy.java
index f47be1d45572..077f1bb77e5f 100644
---
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/PartitionAwareClusteringPlanStrategy.java
+++
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/strategy/PartitionAwareClusteringPlanStrategy.java
@@ -144,7 +144,10 @@ public abstract class
PartitionAwareClusteringPlanStrategy<T,I,K,O> extends Clus
* Return list of partition paths to be considered for clustering.
*/
public Pair<List<String>, List<String>>
filterPartitionPaths(HoodieWriteConfig writeConfig, List<String> partitions) {
- return ClusteringPlanPartitionFilter.filter(partitions, getWriteConfig());
+ Pair<List<String>, List<String>> result =
ClusteringPlanPartitionFilter.filter(partitions, getWriteConfig());
+ result.getLeft().sort(String::compareTo);
+ log.debug("Filtered to the following partitions after sorting: {}",
result.getLeft());
+ return result;
}
@Override
diff --git
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestSparkClusteringPlanPartitionFilter.java
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestSparkClusteringPlanPartitionFilter.java
index e1856421f1a4..1b0e173a37ee 100644
---
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestSparkClusteringPlanPartitionFilter.java
+++
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/cluster/strategy/TestSparkClusteringPlanPartitionFilter.java
@@ -32,10 +32,10 @@ import org.junit.jupiter.api.Test;
import org.mockito.Mock;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertSame;
public class TestSparkClusteringPlanPartitionFilter {
@Mock
@@ -70,8 +70,8 @@ public class TestSparkClusteringPlanPartitionFilter {
@Test
public void testFilterPartitionRecentDays() {
HoodieWriteConfig config =
hoodieWriteConfigBuilder.withClusteringConfig(HoodieClusteringConfig.newBuilder()
- .withClusteringSkipPartitionsFromLatest(1)
- .withClusteringTargetPartitions(1)
+ .withClusteringSkipPartitionsFromLatest(0)
+ .withClusteringTargetPartitions(2)
.withClusteringPlanPartitionFilterMode(ClusteringPlanPartitionFilterMode.RECENT_DAYS)
.build())
.build();
@@ -80,16 +80,16 @@ public class TestSparkClusteringPlanPartitionFilter {
ArrayList<String> fakeTimeBasedPartitionsPath = new ArrayList<>();
fakeTimeBasedPartitionsPath.add("20210718");
fakeTimeBasedPartitionsPath.add("20210716");
- fakeTimeBasedPartitionsPath.add("20210719");
+ fakeTimeBasedPartitionsPath.add("20210717");
List list = (List)sg.filterPartitionPaths(null,
fakeTimeBasedPartitionsPath).getLeft();
- assertEquals(1, list.size());
- assertSame("20210718", list.get(0));
+ assertEquals(2, list.size());
+ assertEquals(Arrays.asList("20210717", "20210718"), list);
}
@Test
public void testFilterPartitionSelectedPartitions() {
HoodieWriteConfig config =
hoodieWriteConfigBuilder.withClusteringConfig(HoodieClusteringConfig.newBuilder()
- .withClusteringPartitionFilterBeginPartition("20211222")
+ .withClusteringPartitionFilterBeginPartition("20211221")
.withClusteringPartitionFilterEndPartition("20211223")
.withClusteringPlanPartitionFilterMode(ClusteringPlanPartitionFilterMode.SELECTED_PARTITIONS)
.build())
@@ -102,8 +102,8 @@ public class TestSparkClusteringPlanPartitionFilter {
fakeTimeBasedPartitionsPath.add("20211222");
fakeTimeBasedPartitionsPath.add("20211224");
List list = (List)sg.filterPartitionPaths(config,
fakeTimeBasedPartitionsPath).getLeft();
- assertEquals(1, list.size());
- assertSame("20211222", list.get(0));
+ assertEquals(2, list.size());
+ assertEquals(Arrays.asList("20211221", "20211222"), list);
}
@Test