prashantwason commented on code in PR #17550:
URL: https://github.com/apache/hudi/pull/17550#discussion_r2730316468
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCleanConfig.java:
##########
@@ -201,6 +201,25 @@ public class HoodieCleanConfig extends HoodieConfig {
+ "table receives updates/deletes. Another reason to turn this on,
would be to ensure data residing in bootstrap "
+ "base files are also physically deleted, to comply with data
privacy enforcement processes.");
+ public static final ConfigProperty<String> CLEANER_PARTITION_FILTER_REGEX =
ConfigProperty
+ .key("hoodie.cleaner.partition.filter.regex")
+ .noDefaultValue()
+ .markAdvanced()
+ .sinceVersion("1.2.0")
+ .withDocumentation("When incremental clean is disabled, this regex can
be used to filter the partitions to be cleaned. "
+ + "Only partitions matching this regex pattern will be cleaned. "
+ + "This can be useful for very large tables to avoid OOM issues
during cleaning. "
+ + "If both this config and " +
"hoodie.cleaner.partition.filter.selected" + " are set, the selected partitions
take precedence.");
+
+ public static final ConfigProperty<String> CLEANER_PARTITION_FILTER_SELECTED
= ConfigProperty
+ .key("hoodie.cleaner.partition.filter.selected")
+ .noDefaultValue()
+ .markAdvanced()
+ .sinceVersion("1.2.0")
+ .withDocumentation("When incremental clean is disabled, this
comma-separated list of partitions can be used to filter the partitions to be
cleaned. "
+ + "Only the specified partitions will be cleaned. "
+ + "This can be useful for very large tables to avoid OOM issues
during cleaning. "
+ + "If both this config and " +
"hoodie.cleaner.partition.filter.regex" + " are set, the selected partitions
take precedence.");
Review Comment:
Done
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java:
##########
@@ -260,11 +260,37 @@ private Stream<String>
getPartitionsForInstants(HoodieInstant instant) {
*/
private List<String> getPartitionPathsForFullCleaning() {
// Go to brute force mode of scanning all partitions
+ List<String> allPartitionPaths;
try {
- return hoodieTable.getMetadataTable().getAllPartitionPaths();
+ allPartitionPaths =
hoodieTable.getMetadataTable().getAllPartitionPaths();
} catch (IOException ioe) {
throw new HoodieIOException("Fetching all partitions failed ", ioe);
}
+
+ String partitionSelected = config.getCleanerPartitionFilterSelected();
+ String partitionRegex = config.getCleanerPartitionFilterRegex();
+
Review Comment:
Done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]