This is an automated email from the ASF dual-hosted git repository.
zihanli58 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new 158d6fff5 [GOBBLIN-1825]Hive retention job should fail if deleting
underlying files fail (#3687)
158d6fff5 is described below
commit 158d6fff546ee28ad7b542bfe78e5263af8843c6
Author: meethngala <[email protected]>
AuthorDate: Wed May 24 13:46:56 2023 -0700
[GOBBLIN-1825]Hive retention job should fail if deleting underlying files
fail (#3687)
* fail hive retention if we fail to delete underlying hdfs files
* address PR comments
* address PR comments
---------
Co-authored-by: Meeth Gala <[email protected]>
---
.../retention/version/HiveDatasetVersionCleaner.java | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/retention/version/HiveDatasetVersionCleaner.java
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/retention/version/HiveDatasetVersionCleaner.java
index ffcc3b352..1363be2e0 100644
---
a/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/retention/version/HiveDatasetVersionCleaner.java
+++
b/gobblin-data-management/src/main/java/org/apache/gobblin/data/management/retention/version/HiveDatasetVersionCleaner.java
@@ -88,20 +88,31 @@ public class HiveDatasetVersionCleaner extends
VersionCleaner {
Partition partition = hiveDatasetVersion.getPartition();
try {
if (!cleanableHiveDataset.isSimulate()) {
+ // As part of the cleanup process, we want to delete both: hive
partition and underlying hdfs files
+ // However, scenarios arise where hive partition is dropped, but
hdfs files aren't, leading to dangling files
+ // Thus, we reverse the order of cleaning up hdfs files first and
then drop hive partition
+ // In cases where HMS was unresponsive and hive partition couldn't
be dropped
+ // re-running hive retention would drop the partition with no hdfs
files found to be deleted
+ // or set the flag `isShouldDeleteData` to false
+ if (cleanableHiveDataset.isShouldDeleteData()) {
+
cleanableHiveDataset.getFsCleanableHelper().clean(hiveDatasetVersion,
possiblyEmptyDirectories);
+ }
client.get().dropPartition(partition.getTable().getDbName(),
partition.getTable().getTableName(), partition.getValues(), false);
log.info("Successfully dropped partition " +
partition.getCompleteName());
} else {
log.info("Simulating drop partition " + partition.getCompleteName());
}
- if (cleanableHiveDataset.isShouldDeleteData()) {
-
cleanableHiveDataset.getFsCleanableHelper().clean(hiveDatasetVersion,
possiblyEmptyDirectories);
- }
} catch (TException | IOException e) {
log.warn(String.format("Failed to completely delete partition %s.",
partition.getCompleteName()), e);
throw new IOException(e);
}
}
-
cleanableHiveDataset.getFsCleanableHelper().cleanEmptyDirectories(possiblyEmptyDirectories,
cleanableHiveDataset);
+ try {
+
cleanableHiveDataset.getFsCleanableHelper().cleanEmptyDirectories(possiblyEmptyDirectories,
cleanableHiveDataset);
+ } catch (IOException ex) {
+ log.warn(String.format("Failed to delete at least one or more empty
directories from total:{%s} with root path %s",
possiblyEmptyDirectories.size(), cleanableHiveDataset.datasetRoot()), ex);
+ throw new IOException(ex);
+ }
}
@Override