github-actions[bot] commented on code in PR #64269:
URL: https://github.com/apache/doris/pull/64269#discussion_r3378040654
##########
fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataOps.java:
##########
@@ -427,7 +439,98 @@ private void performDropTable(String remoteDbName, String
remoteTblName, boolean
ErrorReport.reportDdlException(ErrorCode.ERR_UNKNOWN_TABLE,
remoteTblName, remoteDbName);
}
}
- catalog.dropTable(getTableIdentifier(remoteDbName, remoteTblName),
true);
+ TableIdentifier tableIdentifier = getTableIdentifier(remoteDbName,
remoteTblName);
+ Optional<String> tableLocation = shouldCleanupManagedLocation()
+ ? loadTableLocation(tableIdentifier) : Optional.empty();
+ catalog.dropTable(tableIdentifier, true);
+ tableLocation.ifPresent(location -> cleanupEmptyLocation(location,
"table", remoteDbName + "." + remoteTblName));
+ }
+
+ private Optional<String> loadNamespaceLocation(Namespace namespace) {
+ Map<String, String> namespaceMetadata =
nsCatalog.loadNamespaceMetadata(namespace);
+ String location = namespaceMetadata.get(NAMESPACE_LOCATION_PROP);
+ return StringUtils.isBlank(location) ? Optional.empty() :
Optional.of(location);
+ }
+
+ private Optional<String> loadTableLocation(TableIdentifier
tableIdentifier) {
+ String location = catalog.loadTable(tableIdentifier).location();
+ return StringUtils.isBlank(location) ? Optional.empty() :
Optional.of(location);
+ }
+
+ private void cleanupEmptyLocation(String location, String objectType,
String objectName) {
+ if (!shouldCleanupManagedLocation() || StringUtils.isBlank(location)) {
+ return;
+ }
+ try (FileSystem fs = createCleanupFileSystem()) {
+ boolean deleted = deleteEmptyDirectory(fs, Location.of(location));
+ if (deleted) {
+ LOG.info("Cleaned empty Iceberg {} location {} for {}",
objectType, location, objectName);
+ } else {
+ LOG.info("Skip cleaning Iceberg {} location {} for {}, because
it still contains files",
+ objectType, location, objectName);
+ }
+ } catch (Exception e) {
+ LOG.warn("Failed to clean Iceberg {} location {} for {} after
drop",
+ objectType, location, objectName, e);
+ }
+ }
+
+ private boolean shouldCleanupManagedLocation() {
+ return dorisCatalog instanceof IcebergExternalCatalog
+ && IcebergExternalCatalog.ICEBERG_HMS.equals(
+ ((IcebergExternalCatalog)
dorisCatalog).getIcebergCatalogType());
+ }
+
+ @VisibleForTesting
+ protected FileSystem createCleanupFileSystem() {
+ return new
SpiSwitchingFileSystem(dorisCatalog.getCatalogProperty().getStoragePropertiesMap());
+ }
+
+ @VisibleForTesting
+ static boolean deleteEmptyDirectory(FileSystem fs, Location location)
throws IOException {
+ if (!fs.exists(location)) {
+ return true;
+ }
+ boolean containsFile = false;
+ boolean deletedChildren = true;
+ List<Location> childDirectories = new ArrayList<>();
+ try (FileIterator iterator = fs.list(location)) {
+ while (iterator.hasNext()) {
Review Comment:
This recursion assumes `FileSystem.list()` exposes child directories so
`entry.isDirectory()` can drive traversal. That is not true for all filesystems
used by HMS Iceberg locations: `S3FileSystem.list()` returns a flat object
listing and skips directory-marker objects, so an empty location that contains
only nested markers such as `s3://bucket/warehouse/db/t/data/` and
`.../metadata/` will collect no `childDirectories`, `hasChildren()` also
returns false, and `deleteDirectory()` deletes only the top marker. The nested
markers remain, so `exists(location)` can still be true while the code logs
successful cleanup. Please use filesystem operations that match object-store
semantics as well, for example delete all marker/prefix children when there are
no real files, or add an object-store-aware empty-directory cleanup path and
cover it with a test.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]