codope commented on code in PR #6498:
URL: https://github.com/apache/hudi/pull/6498#discussion_r978718640
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java:
##########
@@ -148,23 +148,52 @@ public List<String>
getPartitionPathsToClean(Option<HoodieInstant> earliestRetai
* @return list of partitions
* @throws IOException
*/
Review Comment:
Can you also add more details in the javadoc for this method?
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java:
##########
@@ -148,23 +148,52 @@ public List<String>
getPartitionPathsToClean(Option<HoodieInstant> earliestRetai
* @return list of partitions
* @throws IOException
*/
- private List<String>
getPartitionPathsForCleanByCommits(Option<HoodieInstant> instantToRetain)
throws IOException {
- if (!instantToRetain.isPresent()) {
- LOG.info("No earliest commit to retain. No need to scan partitions !!");
- return Collections.emptyList();
- }
+ private List<String>
getPartitionPathsForCleanByCommits(Option<HoodieInstant> instantToRetain,
HoodieCleaningPolicy cleaningPolicy) throws IOException {
+ if (cleaningPolicy != HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS) {
+ if (!instantToRetain.isPresent()) {
+ LOG.info("No earliest commit to retain. No need to scan partitions
!!");
+ return Collections.emptyList();
+ }
- if (config.incrementalCleanerModeEnabled()) {
- Option<HoodieInstant> lastClean =
hoodieTable.getCleanTimeline().filterCompletedInstants().lastInstant();
- if (lastClean.isPresent()) {
- if (hoodieTable.getActiveTimeline().isEmpty(lastClean.get())) {
-
hoodieTable.getActiveTimeline().deleteEmptyInstantIfExists(lastClean.get());
- } else {
- HoodieCleanMetadata cleanMetadata = TimelineMetadataUtils
+ if (config.incrementalCleanerModeEnabled()) {
+ Option<HoodieInstant> lastClean =
hoodieTable.getCleanTimeline().filterCompletedInstants().lastInstant();
+ if (lastClean.isPresent()) {
+ if (hoodieTable.getActiveTimeline().isEmpty(lastClean.get())) {
+
hoodieTable.getActiveTimeline().deleteEmptyInstantIfExists(lastClean.get());
+ } else {
+ HoodieCleanMetadata cleanMetadata = TimelineMetadataUtils
+
.deserializeHoodieCleanMetadata(hoodieTable.getActiveTimeline().getInstantDetails(lastClean.get()).get());
+ if ((cleanMetadata.getEarliestCommitToRetain() != null)
+ && (cleanMetadata.getEarliestCommitToRetain().length() > 0)) {
+ return
getPartitionPathsForIncrementalCleaning(cleanMetadata.getEarliestCommitToRetain(),
instantToRetain);
+ }
+ }
+ }
+ }
+ } else {
+ // FILE_VERSIONS_RETAINED
+ if (config.incrementalCleanerModeEnabled()) {
+ // find the last completed commit from last clean and mark that as
earliest commit to retain.
+ Option<HoodieInstant> lastClean =
hoodieTable.getCleanTimeline().filterCompletedInstants().lastInstant();
+ if (lastClean.isPresent()) {
+ if (hoodieTable.getActiveTimeline().isEmpty(lastClean.get())) {
+
hoodieTable.getActiveTimeline().deleteEmptyInstantIfExists(lastClean.get());
+ } else {
+ HoodieCleanMetadata cleanMetadata = null;
+ try {
+ cleanMetadata = TimelineMetadataUtils
.deserializeHoodieCleanMetadata(hoodieTable.getActiveTimeline().getInstantDetails(lastClean.get()).get());
- if ((cleanMetadata.getEarliestCommitToRetain() != null)
- && (cleanMetadata.getEarliestCommitToRetain().length() > 0))
{
- return getPartitionPathsForIncrementalCleaning(cleanMetadata,
instantToRetain);
+ if
(!StringUtils.isNullOrEmpty(cleanMetadata.getLastCompletedCommitTimestamp())) {
Review Comment:
Is it possible that there are any inflight commits before
`lastCompletedCommitTimestamp`?
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java:
##########
@@ -148,23 +148,52 @@ public List<String>
getPartitionPathsToClean(Option<HoodieInstant> earliestRetai
* @return list of partitions
* @throws IOException
*/
- private List<String>
getPartitionPathsForCleanByCommits(Option<HoodieInstant> instantToRetain)
throws IOException {
- if (!instantToRetain.isPresent()) {
- LOG.info("No earliest commit to retain. No need to scan partitions !!");
- return Collections.emptyList();
- }
+ private List<String>
getPartitionPathsForCleanByCommits(Option<HoodieInstant> instantToRetain,
HoodieCleaningPolicy cleaningPolicy) throws IOException {
+ if (cleaningPolicy != HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS) {
+ if (!instantToRetain.isPresent()) {
+ LOG.info("No earliest commit to retain. No need to scan partitions
!!");
+ return Collections.emptyList();
+ }
- if (config.incrementalCleanerModeEnabled()) {
- Option<HoodieInstant> lastClean =
hoodieTable.getCleanTimeline().filterCompletedInstants().lastInstant();
- if (lastClean.isPresent()) {
- if (hoodieTable.getActiveTimeline().isEmpty(lastClean.get())) {
-
hoodieTable.getActiveTimeline().deleteEmptyInstantIfExists(lastClean.get());
- } else {
- HoodieCleanMetadata cleanMetadata = TimelineMetadataUtils
+ if (config.incrementalCleanerModeEnabled()) {
+ Option<HoodieInstant> lastClean =
hoodieTable.getCleanTimeline().filterCompletedInstants().lastInstant();
+ if (lastClean.isPresent()) {
+ if (hoodieTable.getActiveTimeline().isEmpty(lastClean.get())) {
+
hoodieTable.getActiveTimeline().deleteEmptyInstantIfExists(lastClean.get());
+ } else {
+ HoodieCleanMetadata cleanMetadata = TimelineMetadataUtils
+
.deserializeHoodieCleanMetadata(hoodieTable.getActiveTimeline().getInstantDetails(lastClean.get()).get());
+ if ((cleanMetadata.getEarliestCommitToRetain() != null)
+ && (cleanMetadata.getEarliestCommitToRetain().length() > 0)) {
+ return
getPartitionPathsForIncrementalCleaning(cleanMetadata.getEarliestCommitToRetain(),
instantToRetain);
+ }
+ }
+ }
+ }
+ } else {
+ // FILE_VERSIONS_RETAINED
+ if (config.incrementalCleanerModeEnabled()) {
+ // find the last completed commit from last clean and mark that as
earliest commit to retain.
+ Option<HoodieInstant> lastClean =
hoodieTable.getCleanTimeline().filterCompletedInstants().lastInstant();
+ if (lastClean.isPresent()) {
+ if (hoodieTable.getActiveTimeline().isEmpty(lastClean.get())) {
+
hoodieTable.getActiveTimeline().deleteEmptyInstantIfExists(lastClean.get());
+ } else {
+ HoodieCleanMetadata cleanMetadata = null;
Review Comment:
nit: remove redundant initializer. If we do that, then no need of
effectively final variable below.
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java:
##########
@@ -148,23 +148,52 @@ public List<String>
getPartitionPathsToClean(Option<HoodieInstant> earliestRetai
* @return list of partitions
* @throws IOException
*/
- private List<String>
getPartitionPathsForCleanByCommits(Option<HoodieInstant> instantToRetain)
throws IOException {
- if (!instantToRetain.isPresent()) {
- LOG.info("No earliest commit to retain. No need to scan partitions !!");
- return Collections.emptyList();
- }
+ private List<String>
getPartitionPathsForCleanByCommits(Option<HoodieInstant> instantToRetain,
HoodieCleaningPolicy cleaningPolicy) throws IOException {
+ if (cleaningPolicy != HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS) {
+ if (!instantToRetain.isPresent()) {
+ LOG.info("No earliest commit to retain. No need to scan partitions
!!");
+ return Collections.emptyList();
+ }
- if (config.incrementalCleanerModeEnabled()) {
- Option<HoodieInstant> lastClean =
hoodieTable.getCleanTimeline().filterCompletedInstants().lastInstant();
- if (lastClean.isPresent()) {
- if (hoodieTable.getActiveTimeline().isEmpty(lastClean.get())) {
-
hoodieTable.getActiveTimeline().deleteEmptyInstantIfExists(lastClean.get());
- } else {
- HoodieCleanMetadata cleanMetadata = TimelineMetadataUtils
+ if (config.incrementalCleanerModeEnabled()) {
+ Option<HoodieInstant> lastClean =
hoodieTable.getCleanTimeline().filterCompletedInstants().lastInstant();
+ if (lastClean.isPresent()) {
+ if (hoodieTable.getActiveTimeline().isEmpty(lastClean.get())) {
+
hoodieTable.getActiveTimeline().deleteEmptyInstantIfExists(lastClean.get());
+ } else {
+ HoodieCleanMetadata cleanMetadata = TimelineMetadataUtils
+
.deserializeHoodieCleanMetadata(hoodieTable.getActiveTimeline().getInstantDetails(lastClean.get()).get());
+ if ((cleanMetadata.getEarliestCommitToRetain() != null)
+ && (cleanMetadata.getEarliestCommitToRetain().length() > 0)) {
+ return
getPartitionPathsForIncrementalCleaning(cleanMetadata.getEarliestCommitToRetain(),
instantToRetain);
+ }
+ }
+ }
+ }
+ } else {
+ // FILE_VERSIONS_RETAINED
+ if (config.incrementalCleanerModeEnabled()) {
+ // find the last completed commit from last clean and mark that as
earliest commit to retain.
+ Option<HoodieInstant> lastClean =
hoodieTable.getCleanTimeline().filterCompletedInstants().lastInstant();
+ if (lastClean.isPresent()) {
+ if (hoodieTable.getActiveTimeline().isEmpty(lastClean.get())) {
+
hoodieTable.getActiveTimeline().deleteEmptyInstantIfExists(lastClean.get());
+ } else {
+ HoodieCleanMetadata cleanMetadata = null;
+ try {
+ cleanMetadata = TimelineMetadataUtils
.deserializeHoodieCleanMetadata(hoodieTable.getActiveTimeline().getInstantDetails(lastClean.get()).get());
- if ((cleanMetadata.getEarliestCommitToRetain() != null)
- && (cleanMetadata.getEarliestCommitToRetain().length() > 0))
{
- return getPartitionPathsForIncrementalCleaning(cleanMetadata,
instantToRetain);
+ if
(!StringUtils.isNullOrEmpty(cleanMetadata.getLastCompletedCommitTimestamp())) {
+ HoodieCleanMetadata finalCleanMetadata = cleanMetadata;
+ Option<HoodieInstant> lastCompletedInstantDuringLastClean =
hoodieTable.getActiveTimeline().filterCompletedInstants()
+ .filter(entry ->
entry.getTimestamp().equals(finalCleanMetadata.getLastCompletedCommitTimestamp())).firstInstant();
Review Comment:
```suggestion
Option<HoodieInstant> lastCompletedInstantDuringLastClean =
hoodieTable.getActiveTimeline().filterCompletedInstants()
.filter(entry ->
entry.getTimestamp().equals(cleanMetadata.getLastCompletedCommitTimestamp())).firstInstant();
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]