Tom-Newton commented on code in PR #39361:
URL: https://github.com/apache/arrow/pull/39361#discussion_r1439758695
##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -1288,97 +1328,81 @@ class AzureFileSystem::Impl {
return ptr;
}
- Status CreateDir(const AzureLocation& location) {
- if (location.container.empty()) {
- return Status::Invalid("CreateDir requires a non-empty path.");
- }
-
- auto container_client =
- blob_service_client_->GetBlobContainerClient(location.container);
- if (location.path.empty()) {
- try {
- auto response = container_client.Create();
- return response.Value.Created
- ? Status::OK()
- : Status::AlreadyExists("Directory already exists: " +
location.all);
- } catch (const Storage::StorageException& exception) {
- return ExceptionToStatus(exception,
- "Failed to create a container: ",
location.container,
- ": ", container_client.GetUrl());
- }
- }
-
- auto adlfs_client =
datalake_service_client_->GetFileSystemClient(location.container);
- ARROW_ASSIGN_OR_RAISE(auto hns_support,
HierarchicalNamespaceSupport(adlfs_client));
- if (hns_support == HNSSupport::kContainerNotFound) {
- return PathNotFound(location);
- }
- if (hns_support == HNSSupport::kDisabled) {
- ARROW_ASSIGN_OR_RAISE(
- auto container_info,
- GetContainerPropsAsFileInfo(location.container, container_client));
- if (container_info.type() == FileType::NotFound) {
- return PathNotFound(location);
- }
- // Without hierarchical namespace enabled Azure blob storage has no
directories.
- // Therefore we can't, and don't need to create one. Simply creating a
blob with `/`
- // in the name implies directories.
- return Status::OK();
- }
-
- auto directory_client = adlfs_client.GetDirectoryClient(location.path);
- try {
- auto response = directory_client.Create();
- if (response.Value.Created) {
- return Status::OK();
- } else {
- return StatusFromErrorResponse(directory_client.GetUrl(),
*response.RawResponse,
- "Failed to create a directory: " +
location.path);
+ private:
+ /// This function cannot assume the filesystem/container already exists.
+ ///
+ /// \pre location.container is not empty.
+ /// \pre location.path is not empty.
+ template <class ContainerClient, class GetDirectoryClient, class
CreateDirIfNotExists>
+ Status CreateDirTemplate(const ContainerClient& container_client,
+ GetDirectoryClient&& get_directory_client,
+ CreateDirIfNotExists&& create_if_not_exists,
+ const AzureLocation& location, bool recursive) {
+ DCHECK(!location.container.empty());
+ DCHECK(!location.path.empty());
+ // Non-recursive CreateDir calls require the parent directory to exist.
+ if (!recursive) {
+ auto parent = location.parent();
+ if (!parent.path.empty()) {
+ RETURN_NOT_OK(CheckDirExists(container_client, parent));
}
- } catch (const Storage::StorageException& exception) {
- return ExceptionToStatus(exception, "Failed to create a directory: ",
location.path,
- ": ", directory_client.GetUrl());
- }
- }
-
- Status CreateDirRecursive(const AzureLocation& location) {
- if (location.container.empty()) {
- return Status::Invalid("CreateDir requires a non-empty path.");
+ // If the parent location is just the container, we don't need to check
if it
+ // exists because the operation we perform below will fail if the
container
+ // doesn't exist and we can handle that error according to the recursive
flag.
}
-
- auto container_client =
- blob_service_client_->GetBlobContainerClient(location.container);
+ auto directory_client = get_directory_client(container_client, location);
try {
- container_client.CreateIfNotExists();
- } catch (const Storage::StorageException& exception) {
- return ExceptionToStatus(exception,
- "Failed to create a container: ",
location.container, " (",
- container_client.GetUrl(), ")");
- }
-
- auto adlfs_client =
datalake_service_client_->GetFileSystemClient(location.container);
- ARROW_ASSIGN_OR_RAISE(auto hns_support,
HierarchicalNamespaceSupport(adlfs_client));
- if (hns_support == HNSSupport::kDisabled) {
- // Without hierarchical namespace enabled Azure blob storage has no
directories.
- // Therefore we can't, and don't need to create one. Simply creating a
blob with `/`
- // in the name implies directories.
+ create_if_not_exists(directory_client);
return Status::OK();
- }
- // Don't handle HNSSupport::kContainerNotFound, just assume it still
exists (because
- // it was created above) and try to create the directory.
-
- if (!location.path.empty()) {
- auto directory_client = adlfs_client.GetDirectoryClient(location.path);
- try {
- directory_client.CreateIfNotExists();
- } catch (const Storage::StorageException& exception) {
- return ExceptionToStatus(exception,
- "Failed to create a directory: ",
location.path, " (",
- directory_client.GetUrl(), ")");
+ } catch (const Storage::StorageException& exception) {
+ if (IsContainerNotFound(exception)) {
+ try {
+ if (recursive) {
+ container_client.CreateIfNotExists();
+ create_if_not_exists(directory_client);
+ return Status::OK();
+ } else {
+ auto parent = location.parent();
+ return PathNotFound(parent);
+ }
+ } catch (const Storage::StorageException& second_exception) {
+ return ExceptionToStatus(second_exception);
+ }
}
+ return ExceptionToStatus(exception);
}
+ }
- return Status::OK();
+ public:
+ /// This function cannot assume the filesystem already exists.
+ ///
+ /// \pre location.container is not empty.
+ /// \pre location.path is not empty.
+ Status CreateDirOnFileSystem(const DataLake::DataLakeFileSystemClient&
adlfs_client,
+ const AzureLocation& location, bool recursive) {
+ return CreateDirTemplate(
+ adlfs_client,
+ [](auto& adlfs_client, auto& location) {
+ return adlfs_client.GetDirectoryClient(location.path);
+ },
+ [](auto& directory_client) { directory_client.CreateIfNotExists(); },
location,
+ recursive);
+ }
+
+ /// This function cannot assume the container already exists.
+ ///
+ /// \pre location.container is not empty.
+ /// \pre location.path is not empty.
+ Status CreateDirOnContainer(const Blobs::BlobContainerClient&
container_client,
+ const AzureLocation& location, bool recursive) {
+ return CreateDirTemplate(
+ container_client,
+ [](auto& container_client, auto& location) {
+ auto dir_marker_blob_path =
internal::EnsureTrailingSlash(location.path);
+ return
container_client.GetBlobClient(dir_marker_blob_path).AsBlockBlobClient();
+ },
+ [](auto& block_blob_client) { block_blob_client.UploadFrom(nullptr,
0); },
Review Comment:
More specifically I think we should add some metadata which will be
compatible such that
https://github.com/fsspec/adlfs/blob/32132c4094350fca2680155a5c236f2e9f991ba5/adlfs/spec.py#L855-L870
can correctly detect the directories.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]