felipecrv commented on code in PR #40075:
URL: https://github.com/apache/arrow/pull/40075#discussion_r1491877474


##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -2055,6 +2035,116 @@ class AzureFileSystem::Impl {
   static constexpr auto kTimeNeededForFileOrDirectoryRename = 
std::chrono::seconds{3};
 
  public:
+  /// \pre location.container is not empty.
+  /// \pre location.path is not empty.
+  /// \pre location.path does not end with a trailing slash.
+  Status DeleteFileOnFileSystem(const DataLake::DataLakeFileSystemClient& 
adlfs_client,
+                                const AzureLocation& location, bool 
require_file_to_exist,
+                                Azure::Nullable<std::string> lease_id = {}) {
+    DCHECK(!location.container.empty());
+    DCHECK(!location.path.empty());
+    DCHECK(!internal::HasTrailingSlash(location.path));
+    auto file_client = adlfs_client.GetFileClient(location.path);
+    DataLake::GetPathPropertiesOptions get_options;
+    get_options.AccessConditions.LeaseId = lease_id;
+    DataLake::DeleteFileOptions delete_options;
+    delete_options.AccessConditions.LeaseId = std::move(lease_id);
+    try {
+      // This is necessary to avoid deletion of directories via DeleteFile.
+      auto properties = file_client.GetProperties(get_options);
+      if (properties.Value.IsDirectory) {
+        return internal::NotAFile(location.all);
+      }
+      auto response = file_client.Delete(delete_options);
+      // Only the "*IfExists" functions ever set Deleted to false.
+      // All the others either succeed or throw an exception.
+      DCHECK(response.Value.Deleted);
+    } catch (const Storage::StorageException& exception) {
+      if (exception.StatusCode == Http::HttpStatusCode::NotFound) {
+        // ErrorCode can be "FilesystemNotFound", "PathNotFound"...
+        if (require_file_to_exist) {
+          return PathNotFound(location);
+        }
+        return Status::OK();
+      }
+      return ExceptionToStatus(exception, "Failed to delete a file: ", 
location.path,
+                               ": ", file_client.GetUrl());
+    }
+    return Status::OK();
+  }
+
+  /// \pre location.container is not empty.
+  /// \pre location.path is not empty.
+  /// \pre location.path does not end with a trailing slash.
+  Status DeleteFileOnContainer(const Blobs::BlobContainerClient& 
container_client,
+                               const AzureLocation& location, bool 
require_file_to_exist,
+                               const char* operation) {
+    DCHECK(!location.container.empty());
+    DCHECK(!location.path.empty());
+    DCHECK(!internal::HasTrailingSlash(location.path));
+    constexpr auto kFileBlobLeaseTime = std::chrono::seconds{15};
+
+    // If the parent directory of a file is not the container itself, there is 
a
+    // risk that deleting the file also deletes the *implied directory* -- the
+    // directory that is implied by the existence of the file path.
+    //
+    // In this case, we must ensure that the deletion is not semantically
+    // equivalent to also deleting the directory. This is done by ensuring the
+    // directory marker blob exists before the file is deleted.
+    std::optional<LeaseGuard> file_blob_lease_guard;
+    const auto parent = location.parent();
+    if (!parent.path.empty()) {
+      // We have to check the existence of the file before checking the
+      // existence of the parent directory marker, so we acquire a lease on the
+      // file first.
+      ARROW_ASSIGN_OR_RAISE(auto file_blob_lease_client,
+                            AcquireBlobLease(location, kFileBlobLeaseTime,
+                                             
/*allow_missing=*/!require_file_to_exist));
+      if (file_blob_lease_client) {
+        file_blob_lease_guard.emplace(std::move(file_blob_lease_client),
+                                      kFileBlobLeaseTime);
+        // Ensure the empty directory marker blob exists before the file is 
deleted.
+        //
+        // There is not need to hold a lease on the directory marker because if
+        // a concurrent client deletes the directory marker right after we
+        // create it, the file deletion itself won't be the cause of the 
directory
+        // deletion. Additionally, the fact that a lease is held on the blob 
path
+        // semantically preserves the directory -- its existence is implied
+        // until the blob representing the file is deleted -- even if another
+        // client deletes the directory marker.
+        RETURN_NOT_OK(EnsureEmptyDirExists(container_client, parent, 
operation));
+      } else {
+        // File blob doesn't exist and require_file_to_exists is false, so 
just return OK.
+        // Trying to delete a file that doesn't exist, costs only one request 
to the
+        // server.
+        DCHECK(!require_file_to_exist);
+        return Status::OK();
+      }
+    }
+
+    auto blob_client = container_client.GetBlobClient(location.path);
+    Blobs::DeleteBlobOptions options;
+    if (file_blob_lease_guard) {
+      options.AccessConditions.LeaseId = file_blob_lease_guard->LeaseId();
+    }
+    try {
+      auto response = blob_client.Delete(options);

Review Comment:
   Alright. Adding a comment on `BreakBeforeDeletion`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to