felipecrv commented on code in PR #39361:
URL: https://github.com/apache/arrow/pull/39361#discussion_r1436529364


##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -1288,97 +1328,81 @@ class AzureFileSystem::Impl {
     return ptr;
   }
 
-  Status CreateDir(const AzureLocation& location) {
-    if (location.container.empty()) {
-      return Status::Invalid("CreateDir requires a non-empty path.");
-    }
-
-    auto container_client =
-        blob_service_client_->GetBlobContainerClient(location.container);
-    if (location.path.empty()) {
-      try {
-        auto response = container_client.Create();
-        return response.Value.Created
-                   ? Status::OK()
-                   : Status::AlreadyExists("Directory already exists: " + 
location.all);
-      } catch (const Storage::StorageException& exception) {
-        return ExceptionToStatus(exception,
-                                 "Failed to create a container: ", 
location.container,
-                                 ": ", container_client.GetUrl());
-      }
-    }
-
-    auto adlfs_client = 
datalake_service_client_->GetFileSystemClient(location.container);
-    ARROW_ASSIGN_OR_RAISE(auto hns_support, 
HierarchicalNamespaceSupport(adlfs_client));
-    if (hns_support == HNSSupport::kContainerNotFound) {
-      return PathNotFound(location);
-    }
-    if (hns_support == HNSSupport::kDisabled) {
-      ARROW_ASSIGN_OR_RAISE(
-          auto container_info,
-          GetContainerPropsAsFileInfo(location.container, container_client));
-      if (container_info.type() == FileType::NotFound) {
-        return PathNotFound(location);
-      }
-      // Without hierarchical namespace enabled Azure blob storage has no 
directories.
-      // Therefore we can't, and don't need to create one. Simply creating a 
blob with `/`
-      // in the name implies directories.
-      return Status::OK();
-    }
-
-    auto directory_client = adlfs_client.GetDirectoryClient(location.path);
-    try {
-      auto response = directory_client.Create();
-      if (response.Value.Created) {
-        return Status::OK();
-      } else {
-        return StatusFromErrorResponse(directory_client.GetUrl(), 
*response.RawResponse,
-                                       "Failed to create a directory: " + 
location.path);
+ private:
+  /// This function cannot assume the filesystem/container already exists.
+  ///
+  /// \pre location.container is not empty.
+  /// \pre location.path is not empty.
+  template <class ContainerClient, class GetDirectoryClient, class 
CreateDirIfNotExists>
+  Status CreateDirTemplate(const ContainerClient& container_client,
+                           GetDirectoryClient&& get_directory_client,
+                           CreateDirIfNotExists&& create_if_not_exists,
+                           const AzureLocation& location, bool recursive) {
+    DCHECK(!location.container.empty());
+    DCHECK(!location.path.empty());
+    // Non-recursive CreateDir calls require the parent directory to exist.
+    if (!recursive) {
+      auto parent = location.parent();
+      if (!parent.path.empty()) {
+        RETURN_NOT_OK(CheckDirExists(container_client, parent));
       }
-    } catch (const Storage::StorageException& exception) {
-      return ExceptionToStatus(exception, "Failed to create a directory: ", 
location.path,
-                               ": ", directory_client.GetUrl());
-    }
-  }
-
-  Status CreateDirRecursive(const AzureLocation& location) {
-    if (location.container.empty()) {
-      return Status::Invalid("CreateDir requires a non-empty path.");
+      // If the parent location is just the container, we don't need to check 
if it
+      // exists because the operation we perform below will fail if the 
container
+      // doesn't exist and we can handle that error according to the recursive 
flag.
     }
-
-    auto container_client =
-        blob_service_client_->GetBlobContainerClient(location.container);
+    auto directory_client = get_directory_client(container_client, location);
     try {
-      container_client.CreateIfNotExists();
-    } catch (const Storage::StorageException& exception) {
-      return ExceptionToStatus(exception,
-                               "Failed to create a container: ", 
location.container, " (",
-                               container_client.GetUrl(), ")");
-    }
-
-    auto adlfs_client = 
datalake_service_client_->GetFileSystemClient(location.container);
-    ARROW_ASSIGN_OR_RAISE(auto hns_support, 
HierarchicalNamespaceSupport(adlfs_client));
-    if (hns_support == HNSSupport::kDisabled) {
-      // Without hierarchical namespace enabled Azure blob storage has no 
directories.
-      // Therefore we can't, and don't need to create one. Simply creating a 
blob with `/`
-      // in the name implies directories.
+      create_if_not_exists(directory_client);
       return Status::OK();
-    }
-    // Don't handle HNSSupport::kContainerNotFound, just assume it still 
exists (because
-    // it was created above) and try to create the directory.
-
-    if (!location.path.empty()) {
-      auto directory_client = adlfs_client.GetDirectoryClient(location.path);
-      try {
-        directory_client.CreateIfNotExists();
-      } catch (const Storage::StorageException& exception) {
-        return ExceptionToStatus(exception,
-                                 "Failed to create a directory: ", 
location.path, " (",
-                                 directory_client.GetUrl(), ")");
+    } catch (const Storage::StorageException& exception) {
+      if (IsContainerNotFound(exception)) {
+        try {
+          if (recursive) {
+            container_client.CreateIfNotExists();
+            create_if_not_exists(directory_client);
+            return Status::OK();
+          } else {
+            auto parent = location.parent();
+            return PathNotFound(parent);
+          }
+        } catch (const Storage::StorageException& second_exception) {
+          return ExceptionToStatus(second_exception);
+        }
       }
+      return ExceptionToStatus(exception);
     }
+  }
 
-    return Status::OK();
+ public:
+  /// This function cannot assume the filesystem already exists.
+  ///
+  /// \pre location.container is not empty.
+  /// \pre location.path is not empty.
+  Status CreateDirOnFileSystem(const DataLake::DataLakeFileSystemClient& 
adlfs_client,
+                               const AzureLocation& location, bool recursive) {
+    return CreateDirTemplate(
+        adlfs_client,
+        [](auto& adlfs_client, auto& location) {
+          return adlfs_client.GetDirectoryClient(location.path);
+        },
+        [](auto& directory_client) { directory_client.CreateIfNotExists(); }, 
location,
+        recursive);
+  }
+
+  /// This function cannot assume the container already exists.
+  ///
+  /// \pre location.container is not empty.
+  /// \pre location.path is not empty.
+  Status CreateDirOnContainer(const Blobs::BlobContainerClient& 
container_client,
+                              const AzureLocation& location, bool recursive) {
+    return CreateDirTemplate(
+        container_client,
+        [](auto& container_client, auto& location) {
+          auto dir_marker_blob_path = 
internal::EnsureTrailingSlash(location.path);
+          return 
container_client.GetBlobClient(dir_marker_blob_path).AsBlockBlobClient();
+        },
+        [](auto& block_blob_client) { block_blob_client.UploadFrom(nullptr, 
0); },

Review Comment:
   Sure. I would like to be as compatible as possible with existing specs. But 
to clarify, what do you mean when you say `adlfs`? Don't you mean `abfs` 
instead [1]?
   
   [1] 
https://github.com/apache/hadoop/blob/trunk/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java
   
   I think there is a confusion between `abfs` (filesystem semantics over the 
Blobs API) and `adlfs` (the separate service that Azure created that can also 
be accessed via the Blobs API with some restrictions).
   
   The `fsspec` repo seems to be linking both `abfs` and `adlfs` to the same 
thing, but they shouldn't be considered the same thing: 
https://github.com/fsspec/filesystem_spec/blob/0ffe06cb767456b7c13904b57ec1c3ca60d53eae/docs/source/api.rst?plain=1#L229
   
   Tell me what I'm missing here because my experience with Azure is restricted 
just to reading the docs (a lot of them) recently.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to