Tom-Newton commented on code in PR #39361:
URL: https://github.com/apache/arrow/pull/39361#discussion_r1436544742


##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -1288,97 +1328,81 @@ class AzureFileSystem::Impl {
     return ptr;
   }
 
-  Status CreateDir(const AzureLocation& location) {
-    if (location.container.empty()) {
-      return Status::Invalid("CreateDir requires a non-empty path.");
-    }
-
-    auto container_client =
-        blob_service_client_->GetBlobContainerClient(location.container);
-    if (location.path.empty()) {
-      try {
-        auto response = container_client.Create();
-        return response.Value.Created
-                   ? Status::OK()
-                   : Status::AlreadyExists("Directory already exists: " + 
location.all);
-      } catch (const Storage::StorageException& exception) {
-        return ExceptionToStatus(exception,
-                                 "Failed to create a container: ", 
location.container,
-                                 ": ", container_client.GetUrl());
-      }
-    }
-
-    auto adlfs_client = 
datalake_service_client_->GetFileSystemClient(location.container);
-    ARROW_ASSIGN_OR_RAISE(auto hns_support, 
HierarchicalNamespaceSupport(adlfs_client));
-    if (hns_support == HNSSupport::kContainerNotFound) {
-      return PathNotFound(location);
-    }
-    if (hns_support == HNSSupport::kDisabled) {
-      ARROW_ASSIGN_OR_RAISE(
-          auto container_info,
-          GetContainerPropsAsFileInfo(location.container, container_client));
-      if (container_info.type() == FileType::NotFound) {
-        return PathNotFound(location);
-      }
-      // Without hierarchical namespace enabled Azure blob storage has no 
directories.
-      // Therefore we can't, and don't need to create one. Simply creating a 
blob with `/`
-      // in the name implies directories.
-      return Status::OK();
-    }
-
-    auto directory_client = adlfs_client.GetDirectoryClient(location.path);
-    try {
-      auto response = directory_client.Create();
-      if (response.Value.Created) {
-        return Status::OK();
-      } else {
-        return StatusFromErrorResponse(directory_client.GetUrl(), 
*response.RawResponse,
-                                       "Failed to create a directory: " + 
location.path);
+ private:
+  /// This function cannot assume the filesystem/container already exists.
+  ///
+  /// \pre location.container is not empty.
+  /// \pre location.path is not empty.
+  template <class ContainerClient, class GetDirectoryClient, class 
CreateDirIfNotExists>
+  Status CreateDirTemplate(const ContainerClient& container_client,
+                           GetDirectoryClient&& get_directory_client,
+                           CreateDirIfNotExists&& create_if_not_exists,
+                           const AzureLocation& location, bool recursive) {
+    DCHECK(!location.container.empty());
+    DCHECK(!location.path.empty());
+    // Non-recursive CreateDir calls require the parent directory to exist.
+    if (!recursive) {
+      auto parent = location.parent();
+      if (!parent.path.empty()) {
+        RETURN_NOT_OK(CheckDirExists(container_client, parent));
       }
-    } catch (const Storage::StorageException& exception) {
-      return ExceptionToStatus(exception, "Failed to create a directory: ", 
location.path,
-                               ": ", directory_client.GetUrl());
-    }
-  }
-
-  Status CreateDirRecursive(const AzureLocation& location) {
-    if (location.container.empty()) {
-      return Status::Invalid("CreateDir requires a non-empty path.");
+      // If the parent location is just the container, we don't need to check 
if it
+      // exists because the operation we perform below will fail if the 
container
+      // doesn't exist and we can handle that error according to the recursive 
flag.
     }
-
-    auto container_client =
-        blob_service_client_->GetBlobContainerClient(location.container);
+    auto directory_client = get_directory_client(container_client, location);
     try {
-      container_client.CreateIfNotExists();
-    } catch (const Storage::StorageException& exception) {
-      return ExceptionToStatus(exception,
-                               "Failed to create a container: ", 
location.container, " (",
-                               container_client.GetUrl(), ")");
-    }
-
-    auto adlfs_client = 
datalake_service_client_->GetFileSystemClient(location.container);
-    ARROW_ASSIGN_OR_RAISE(auto hns_support, 
HierarchicalNamespaceSupport(adlfs_client));
-    if (hns_support == HNSSupport::kDisabled) {
-      // Without hierarchical namespace enabled Azure blob storage has no 
directories.
-      // Therefore we can't, and don't need to create one. Simply creating a 
blob with `/`
-      // in the name implies directories.
+      create_if_not_exists(directory_client);
       return Status::OK();
-    }
-    // Don't handle HNSSupport::kContainerNotFound, just assume it still 
exists (because
-    // it was created above) and try to create the directory.
-
-    if (!location.path.empty()) {
-      auto directory_client = adlfs_client.GetDirectoryClient(location.path);
-      try {
-        directory_client.CreateIfNotExists();
-      } catch (const Storage::StorageException& exception) {
-        return ExceptionToStatus(exception,
-                                 "Failed to create a directory: ", 
location.path, " (",
-                                 directory_client.GetUrl(), ")");
+    } catch (const Storage::StorageException& exception) {
+      if (IsContainerNotFound(exception)) {
+        try {
+          if (recursive) {
+            container_client.CreateIfNotExists();
+            create_if_not_exists(directory_client);
+            return Status::OK();
+          } else {
+            auto parent = location.parent();
+            return PathNotFound(parent);
+          }
+        } catch (const Storage::StorageException& second_exception) {
+          return ExceptionToStatus(second_exception);
+        }
       }
+      return ExceptionToStatus(exception);
     }
+  }
 
-    return Status::OK();
+ public:
+  /// This function cannot assume the filesystem already exists.
+  ///
+  /// \pre location.container is not empty.
+  /// \pre location.path is not empty.
+  Status CreateDirOnFileSystem(const DataLake::DataLakeFileSystemClient& 
adlfs_client,
+                               const AzureLocation& location, bool recursive) {
+    return CreateDirTemplate(
+        adlfs_client,
+        [](auto& adlfs_client, auto& location) {
+          return adlfs_client.GetDirectoryClient(location.path);
+        },
+        [](auto& directory_client) { directory_client.CreateIfNotExists(); }, 
location,
+        recursive);
+  }
+
+  /// This function cannot assume the container already exists.
+  ///
+  /// \pre location.container is not empty.
+  /// \pre location.path is not empty.
+  Status CreateDirOnContainer(const Blobs::BlobContainerClient& 
container_client,
+                              const AzureLocation& location, bool recursive) {
+    return CreateDirTemplate(
+        container_client,
+        [](auto& container_client, auto& location) {
+          auto dir_marker_blob_path = 
internal::EnsureTrailingSlash(location.path);
+          return 
container_client.GetBlobClient(dir_marker_blob_path).AsBlockBlobClient();
+        },
+        [](auto& block_blob_client) { block_blob_client.UploadFrom(nullptr, 
0); },

Review Comment:
   Sorry, in this case I mean the adlfs fsspec implementation.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to