felipecrv commented on code in PR #40567:
URL: https://github.com/apache/arrow/pull/40567#discussion_r1527392098
##########
cpp/src/arrow/filesystem/test_util.h:
##########
@@ -168,6 +168,8 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest {
virtual bool allow_write_file_over_dir() const { return false; }
// - Whether the filesystem allows reading a directory
virtual bool allow_read_dir_as_file() const { return false; }
+ // - Whether the filesystem allows moving a file
+ virtual bool allow_move_file() const { return true; }
Review Comment:
We need a way to run these generic tests against the real Azure Data Lake
environment.
##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -1803,37 +1805,63 @@ class AzureFileSystem::Impl {
const AzureLocation& location, bool recursive) {
DCHECK(!location.container.empty());
DCHECK(!location.path.empty());
- // Non-recursive CreateDir calls require the parent directory to exist.
- if (!recursive) {
+ if (recursive) {
+ std::vector<AzureLocation> target_locations;
+ // Recursive CreateDir calls require there is no file in parents.
+ auto parent = location;
+ while (!parent.path.empty()) {
+ ARROW_ASSIGN_OR_RAISE(auto info, GetFileInfo(container_client,
parent));
+ if (info.type() == FileType::File) {
+ return NotADir(parent);
+ }
+ if (info.type() == FileType::NotFound) {
+ target_locations.push_back(parent);
+ }
+ parent = parent.parent();
Review Comment:
A better name for the `parent` variable is `prefix` because a "prefix" can
be the entire location whereas starting with `parent = location;` is very weird
because of the choice of variable name.
##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -1803,37 +1805,63 @@ class AzureFileSystem::Impl {
const AzureLocation& location, bool recursive) {
DCHECK(!location.container.empty());
DCHECK(!location.path.empty());
- // Non-recursive CreateDir calls require the parent directory to exist.
- if (!recursive) {
+ if (recursive) {
+ std::vector<AzureLocation> target_locations;
+ // Recursive CreateDir calls require there is no file in parents.
+ auto parent = location;
+ while (!parent.path.empty()) {
+ ARROW_ASSIGN_OR_RAISE(auto info, GetFileInfo(container_client,
parent));
+ if (info.type() == FileType::File) {
+ return NotADir(parent);
+ }
+ if (info.type() == FileType::NotFound) {
+ target_locations.push_back(parent);
+ }
+ parent = parent.parent();
+ if (parent.path.empty() && info.type() == FileType::NotFound) {
+ // parent is container and may not exist
+ ARROW_ASSIGN_OR_RAISE(info,
+ GetContainerPropsAsFileInfo(parent,
container_client));
+ if (info.type() == FileType::NotFound) {
+ try {
+ container_client.CreateIfNotExists();
+ } catch (const Storage::StorageException& exception) {
+ return ExceptionToStatus(exception, "Failed to create directory
'",
+ location.all, "': ",
container_client.GetUrl());
+ }
+ }
+ }
+ }
+ for (size_t i = target_locations.size(); i > 0; --i) {
+ const auto& target_location = target_locations[i - 1];
+ try {
+ create_if_not_exists(container_client, target_location);
+ } catch (const Storage::StorageException& exception) {
+ return ExceptionToStatus(exception, "Failed to create directory '",
+ location.all, "': ",
container_client.GetUrl());
Review Comment:
After the first iteration of this loop, an exception shouldn't report a
failure of the entire operation: the full path creates all the prefixes (they
are "implied directories"). All the other functions handle implied directories,
so it should be OK to rely on them.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]