kou commented on code in PR #39904:
URL: https://github.com/apache/arrow/pull/39904#discussion_r1476849199
##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -854,6 +888,397 @@ class TestAzureFileSystem : public ::testing::Test {
const auto directory_path = data.RandomDirectoryPath(rng_);
ASSERT_RAISES(IOError, fs()->DeleteDirContents(directory_path, false));
}
+
+ private:
+ using StringMatcher =
+
::testing::PolymorphicMatcher<::testing::internal::HasSubstrMatcher<std::string>>;
+
+ StringMatcher HasDirMoveToSubdirMessage(const std::string& src,
+ const std::string& dest) {
+ return ::testing::HasSubstr("Cannot Move to '" + dest + "' and make '" +
src +
+ "' a sub-directory of itself.");
+ }
+
+ StringMatcher HasCrossContainerNotImplementedMessage(const std::string&
container_name,
+ const std::string&
dest) {
+ return ::testing::HasSubstr("Move of '" + container_name + "' to '" + dest
+
+ "' requires moving data between "
+ "containers, which is not implemented.");
+ }
+
+ StringMatcher HasMissingParentDirMessage(const std::string& dest) {
+ return ::testing::HasSubstr("The parent directory of the destination path
'" + dest +
+ "' does not exist.");
+ }
+
+ /// \brief Expected POSIX semantics for the rename operation on multiple
+ /// scenarios.
+ ///
+ /// If the src doesn't exist, the error is always ENOENT, otherwise we are
+ /// left with the following combinations:
+ ///
+ /// 1. src's type
+ /// a. File
+ /// b. Directory
+ /// 2. dest's existence
+ /// a. NotFound
+ /// b. File
+ /// c. Directory
+ /// - empty
+ /// - non-empty
+ /// 3. src path has a trailing slash (or not)
+ /// 4. dest path has a trailing slash (or not)
+ ///
+ /// Limitations: this function doesn't consider paths so it assumes that the
+ /// paths don't lead requests for moves that would make the source a subdir
of
+ /// the destination.
+ ///
+ /// \param paths_are_equal src and dest paths without trailing slashes are
equal
+ /// \return std::nullopt if success is expected in the scenario or the errno
+ /// if failure is expected.
+ static std::optional<int> RenameSemantics(FileType src_type, bool
src_trailing_slash,
+ FileType dest_type, bool
dest_trailing_slash,
+ bool dest_is_empty_dir = false,
+ bool paths_are_equal = false) {
+ DCHECK(src_type != FileType::Unknown && dest_type != FileType::Unknown);
+ DCHECK(!dest_is_empty_dir || dest_type == FileType::Directory)
+ << "dest_is_empty_dir must imply dest_type == FileType::Directory";
+ switch (src_type) {
+ case FileType::Unknown:
+ break;
+ case FileType::NotFound:
+ return {ENOENT};
+ case FileType::File:
+ switch (dest_type) {
+ case FileType::Unknown:
+ break;
+ case FileType::NotFound:
+ if (src_trailing_slash) {
+ return {ENOTDIR};
+ }
+ if (dest_trailing_slash) {
+ // A slash on the destination path requires that it exists,
+ // so a confirmation that it's a directory can be performed.
+ return {ENOENT};
+ }
+ return {};
+ case FileType::File:
+ if (src_trailing_slash || dest_trailing_slash) {
+ return {ENOTDIR};
+ }
+ // The existing file is replaced successfuly.
+ return {};
+ case FileType::Directory:
+ if (src_trailing_slash) {
+ return {ENOTDIR};
+ }
+ return EISDIR;
+ }
+ break;
+ case FileType::Directory:
+ switch (dest_type) {
+ case FileType::Unknown:
+ break;
+ case FileType::NotFound:
+ // We don't have to care about the slashes when the source is a
directory.
+ return {};
+ case FileType::File:
+ return {ENOTDIR};
+ case FileType::Directory:
+ if (!paths_are_equal && !dest_is_empty_dir) {
+ return {ENOTEMPTY};
+ }
+ return {};
+ }
+ break;
+ }
+ Unreachable("Invalid parameters passed to RenameSemantics");
+ }
+
+ Status CheckExpectedErrno(const std::string& src, const std::string& dest,
+ std::optional<int> expected_errno,
+ const char* expected_errno_name, FileInfo*
out_src_info) {
+ auto the_fs = fs();
+ const bool src_trailing_slash = internal::HasTrailingSlash(src);
+ const bool dest_trailing_slash = internal::HasTrailingSlash(dest);
+ const auto src_path = std::string{internal::RemoveTrailingSlash(src)};
+ const auto dest_path = std::string{internal::RemoveTrailingSlash(dest)};
+ ARROW_ASSIGN_OR_RAISE(*out_src_info, the_fs->GetFileInfo(src_path));
+ ARROW_ASSIGN_OR_RAISE(auto dest_info, the_fs->GetFileInfo(dest_path));
+ bool dest_is_empty_dir = false;
+ if (dest_info.type() == FileType::Directory) {
+ FileSelector select;
+ select.base_dir = dest_path;
+ select.recursive = false;
+ // TODO(felipecrv): investigate why this can't be false
+ select.allow_not_found = true;
+ ARROW_ASSIGN_OR_RAISE(auto dest_contents, the_fs->GetFileInfo(select));
+ if (dest_contents.empty()) {
+ dest_is_empty_dir = true;
+ }
+ }
+ auto paths_are_equal = src_path == dest_path;
+ auto truly_expected_errno =
+ RenameSemantics(out_src_info->type(), src_trailing_slash,
dest_info.type(),
+ dest_trailing_slash, dest_is_empty_dir,
paths_are_equal);
+ if (truly_expected_errno != expected_errno) {
+ if (expected_errno.has_value()) {
+ return Status::Invalid("expected_errno=", expected_errno_name, "=",
+ *expected_errno,
+ " used in ASSERT_MOVE is incorrect. "
+ "POSIX semantics for this scenario require
errno=",
+ strerror(truly_expected_errno.value_or(0)));
+ } else {
+ DCHECK(truly_expected_errno.has_value());
+ return Status::Invalid(
+ "ASSERT_MOVE used to assert success in a scenario for which "
+ "POSIX semantics requires errno=",
+ strerror(*truly_expected_errno));
+ }
+ }
+ return Status::OK();
+ }
+
+ void AssertAfterMove(const std::string& src, const std::string& dest,
FileType type) {
+ if (internal::RemoveTrailingSlash(src) !=
internal::RemoveTrailingSlash(dest)) {
+ AssertFileInfo(fs(), src, FileType::NotFound);
+ }
+ AssertFileInfo(fs(), dest, type);
+ }
+
+ static bool WithErrno(const Status& status, int expected_errno) {
+ auto* detail = status.detail().get();
+ return detail &&
+ arrow::internal::ErrnoFromStatusDetail(*detail).value_or(-1) ==
expected_errno;
+ }
+
+ std::optional<StringMatcher> MoveErrorMessageMatcher(const FileInfo&
src_info,
+ const std::string& src,
+ const std::string& dest,
+ int for_errno) {
+ switch (for_errno) {
+ case ENOENT: {
+ auto& path = src_info.type() == FileType::NotFound ? src : dest;
+ return ::testing::HasSubstr("Path does not exist '" + path + "'");
+ }
+ case ENOTEMPTY:
+ return ::testing::HasSubstr("Directory not empty: '" + dest + "'");
+ }
+ return std::nullopt;
+ }
+
+#define ASSERT_MOVE(src, dest, expected_errno)
\
+ do {
\
+ auto _src = (src);
\
+ auto _dest = (dest);
\
+ std::optional<int> _expected_errno = (expected_errno);
\
+ FileInfo _src_info;
\
+ ASSERT_OK(
\
+ CheckExpectedErrno(_src, _dest, _expected_errno, #expected_errno,
&_src_info)); \
+ auto _move_st = ::arrow::internal::GenericToStatus(fs()->Move(_src,
_dest)); \
+ if (_expected_errno.has_value()) {
\
+ if (WithErrno(_move_st, *_expected_errno)) {
\
+ /* If the Move failed, the source should remain unchanged. */
\
+ AssertFileInfo(fs(), std::string{internal::RemoveTrailingSlash(_src)},
\
+ _src_info.type());
\
+ auto _message_matcher =
\
+ MoveErrorMessageMatcher(_src_info, _src, _dest, *_expected_errno);
\
+ if (_message_matcher.has_value()) {
\
+ EXPECT_RAISES_WITH_MESSAGE_THAT(IOError, *_message_matcher,
_move_st); \
+ } else {
\
+ SUCCEED();
\
+ }
\
+ } else {
\
+ FAIL() << "Move '" ARROW_STRINGIFY(src) "' to '" ARROW_STRINGIFY(dest)
\
+ << "' did not fail with errno=" << #expected_errno;
\
+ }
\
+ } else {
\
+ if (!_move_st.ok()) {
\
+ FAIL() << "Move '" ARROW_STRINGIFY(src) "' to '" ARROW_STRINGIFY(dest)
\
+ << "' failed with " << _move_st.ToString();
\
+ } else {
\
+ AssertAfterMove(_src, _dest, _src_info.type());
\
+ }
\
+ }
\
+ } while (false)
+
+#define ASSERT_MOVE_OK(src, dest) ASSERT_MOVE((src), (dest), std::nullopt)
+
+ // Tests for Move()
+
+ public:
+ void TestRenameContainer() {
+ EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
+ auto data = SetUpPreexistingData();
+ // Container exists, so renaming to the same name succeeds because it's a
no-op.
+ ASSERT_MOVE_OK(data.container_name, data.container_name);
+ // Renaming a container that doesn't exist fails.
+ ASSERT_MOVE("missing-container", "missing-container", ENOENT);
+ ASSERT_MOVE("missing-container", data.container_name, ENOENT);
+ // Renaming a container to an existing non-empty container fails.
+ auto non_empty_container = PreexistingData::RandomContainerName(rng_);
+ auto non_empty_container_client = CreateContainer(non_empty_container);
+ CreateBlob(non_empty_container_client, "object1",
PreexistingData::kLoremIpsum);
+ ASSERT_MOVE(data.container_name, non_empty_container, ENOTEMPTY);
+ // Renaming to an empty container fails to replace it
+ auto empty_container = PreexistingData::RandomContainerName(rng_);
+ auto empty_container_client = CreateContainer(empty_container);
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ IOError,
+ ::testing::HasSubstr("Unable to replace empty container: '" +
empty_container +
+ "'"),
+ fs()->Move(data.container_name, empty_container));
+ // Renaming to a non-existing container creates it
+ auto new_container = PreexistingData::RandomContainerName(rng_);
+ AssertFileInfo(fs(), new_container, FileType::NotFound);
+ if (env->backend() == AzureBackend::kAzurite) {
+ // Azurite returns a 201 Created for RenameBlobContainer, but the created
+ // container doesn't contain the blobs from the source container and
+ // the source container reamins undeleted after the "rename".
+ } else {
+ // See Azure SDK issue/question:
+ // https://github.com/Azure/azure-sdk-for-cpp/issues/5262
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ IOError,
+ ::testing::HasSubstr("The 'rename' operation is not supported on
containers."),
+ fs()->Move(data.container_name, new_container));
+ // ASSERT_MOVE_OK(data.container_name, new_container);
+ // AssertFileInfo(fs(),
+ // ConcatAbstractPath(new_container,
PreexistingData::kObjectName),
+ // FileType::File);
+ }
+ // Renaming to an empty container can work if the source is also empty
+ auto new_empty_container = PreexistingData::RandomContainerName(rng_);
+ auto new_empty_container_client = CreateContainer(new_empty_container);
+ ASSERT_MOVE_OK(empty_container, new_empty_container);
+ }
+
+ void TestMoveContainerToPath() {
+ auto data = SetUpPreexistingData();
+ ASSERT_MOVE("missing-container", data.ContainerPath("new-subdir"), ENOENT);
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ Invalid,
+ HasDirMoveToSubdirMessage(data.container_name,
data.ContainerPath("new-subdir")),
+ fs()->Move(data.container_name, data.ContainerPath("new-subdir")));
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ NotImplemented,
+ HasCrossContainerNotImplementedMessage(data.container_name,
+ "a-container/new-subdir"),
+ fs()->Move(data.container_name, "a-container/new-subdir"));
+ }
+
+ void TestCreateContainerFromPath() {
+ auto data = SetUpPreexistingData();
+ auto missing_path = data.RandomDirectoryPath(rng_);
+ ASSERT_MOVE(missing_path, "new-container", ENOENT);
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ Invalid,
+ ::testing::HasSubstr("Creating files at '/' is not possible, only
directories."),
+ fs()->Move(data.ObjectPath(), "new-file"));
+ auto src_dir_path = data.RandomDirectoryPath(rng_);
+ ASSERT_OK(fs()->CreateDir(src_dir_path, false));
+ EXPECT_OK_AND_ASSIGN(auto src_dir_info, fs()->GetFileInfo(src_dir_path));
+ EXPECT_EQ(src_dir_info.type(), FileType::Directory);
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ NotImplemented,
+ HasCrossContainerNotImplementedMessage(src_dir_path, "new-container"),
+ fs()->Move(src_dir_path, "new-container"));
+ }
+
+ void TestMovePaths() {
+ Status st;
+ auto data = SetUpPreexistingData();
+ // When source doesn't exist.
+ ASSERT_MOVE("missing-container/src-path", "a-container/dest-path", ENOENT);
+ auto missing_path1 = data.RandomDirectoryPath(rng_);
+ ASSERT_MOVE(missing_path1, "missing-container/path", ENOENT);
+
+ // But when source exists...
+ if (!WithHierarchicalNamespace()) {
+ // ...and containers are different, we get an error message telling
cross-container
+ // moves are not implemented.
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ NotImplemented,
+ HasCrossContainerNotImplementedMessage(data.ObjectPath(),
+ "missing-container/path"),
+ fs()->Move(data.ObjectPath(), "missing-container/path"));
+ GTEST_SKIP()
+ << "The rest of TestMovePaths is not implemented for non-HNS
scenarios";
+ }
+ auto adlfs_client =
+ datalake_service_client_->GetFileSystemClient(data.container_name);
+ // ...and dest.container doesn't exist.
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ IOError, HasMissingParentDirMessage("missing-container/path"),
+ fs()->Move(data.ObjectPath(), "missing-container/path"));
+ AssertFileInfo(fs(), data.ObjectPath(), FileType::File);
+
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ IOError, HasMissingParentDirMessage(data.Path("missing-subdir/file")),
+ fs()->Move(data.ObjectPath(), data.Path("missing-subdir/file")));
+ AssertFileInfo(fs(), data.ObjectPath(), FileType::File);
+
+ // src is a file and dest does not exists
+ ASSERT_MOVE_OK(data.ObjectPath(), data.Path("file0"));
+ ASSERT_MOVE(data.Path("file0/"), data.Path("file1"), ENOTDIR);
+ ASSERT_MOVE(data.Path("file0"), data.Path("file1/"), ENOENT);
+ ASSERT_MOVE(data.Path("file0/"), data.Path("file1/"), ENOTDIR);
+ // "file0" exists
+
+ // src is a file and dest exists (as a file)
+ CreateFile(adlfs_client, PreexistingData::kObjectName,
PreexistingData::kLoremIpsum);
+ CreateFile(adlfs_client, "file1", PreexistingData::kLoremIpsum);
+ ASSERT_MOVE_OK(data.ObjectPath(), data.Path("file0"));
+ ASSERT_MOVE(data.Path("file1/"), data.Path("file0"), ENOTDIR);
+ ASSERT_MOVE(data.Path("file1"), data.Path("file0/"), ENOTDIR);
+ ASSERT_MOVE(data.Path("file1/"), data.Path("file0/"), ENOTDIR);
+ // "file1" and "file2" exist
+
+ // src is a file and dest exists (as an empty dir)
+ CreateDirectory(adlfs_client, "subdir0");
+ ASSERT_MOVE(data.Path("file0"), data.Path("subdir0"), EISDIR);
+ ASSERT_MOVE(data.Path("file0/"), data.Path("subdir0"), ENOTDIR);
+ ASSERT_MOVE(data.Path("file0"), data.Path("subdir0/"), EISDIR);
+ ASSERT_MOVE(data.Path("file0/"), data.Path("subdir0/"), ENOTDIR);
+
+ // src is a file and dest exists (as a non-empty dir)
+ CreateFile(adlfs_client, "subdir0/file-at-subdir");
+ ASSERT_MOVE(data.Path("file0"), data.Path("subdir0"), EISDIR);
+ ASSERT_MOVE(data.Path("file0/"), data.Path("subdir0"), ENOTDIR);
+ ASSERT_MOVE(data.Path("file0"), data.Path("subdir0/"), EISDIR);
+ ASSERT_MOVE(data.Path("file0/"), data.Path("subdir0/"), ENOTDIR);
+ // "subdir0/file-at-subdir" exists
+
+ // src is a directory and dest does not exists
+ CreateDirectory(adlfs_client, "subdir0");
Review Comment:
It seems that `subdir0` created at
https://github.com/apache/arrow/pull/39904/files#diff-7d2cbf9c6abf1ee980b8cf5a79e222543c17b3d93b12c4c0c9fa123b1bb200b6R1238
still exist.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]