This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 46c226e38e GH-38758: [C++][FS][Azure] Rename AzurePath to
AzureLocation (#38773)
46c226e38e is described below
commit 46c226e38e88ca4a30e9ca0f9c63e911a2d645b8
Author: Sutou Kouhei <[email protected]>
AuthorDate: Sun Nov 19 10:56:48 2023 +0900
GH-38758: [C++][FS][Azure] Rename AzurePath to AzureLocation (#38773)
### Rationale for this change
It's for readability. `AzurePath::path_to_file` may refer a directory but
its name includes `file`.
### What changes are included in this PR?
Rename `AzurePath` to `AzureLocation`:
* `AzurePath::path_to_file` -> `AzureLocation::path`
* `AzurePath::path_to_file_parts` -> `AzureLocation::path_parts`
* `AzurePath::full_path` -> `AzureLocation::all`
### Are these changes tested?
Yes.
### Are there any user-facing changes?
No.
* Closes: #38758
Authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
cpp/src/arrow/filesystem/azurefs.cc | 245 ++++++++++++++++++------------------
1 file changed, 126 insertions(+), 119 deletions(-)
diff --git a/cpp/src/arrow/filesystem/azurefs.cc
b/cpp/src/arrow/filesystem/azurefs.cc
index fdf119477a..91e6c22255 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -64,86 +64,88 @@ Status AzureOptions::ConfigureAccountKeyCredentials(const
std::string& account_n
namespace {
-// An AzureFileSystem represents a single Azure storage account. AzurePath
describes a
-// container and path within that storage account.
-struct AzurePath {
- std::string full_path;
+// An AzureFileSystem represents a single Azure storage
+// account. AzureLocation describes a container and path within
+// that storage account.
+struct AzureLocation {
+ std::string all;
std::string container;
- std::string path_to_file;
- std::vector<std::string> path_to_file_parts;
+ std::string path;
+ std::vector<std::string> path_parts;
- static Result<AzurePath> FromString(const std::string& s) {
+ static Result<AzureLocation> FromString(const std::string& string) {
// Example expected string format: testcontainer/testdir/testfile.txt
// container = testcontainer
- // path_to_file = testdir/testfile.txt
- // path_to_file_parts = [testdir, testfile.txt]
- if (internal::IsLikelyUri(s)) {
+ // path = testdir/testfile.txt
+ // path_parts = [testdir, testfile.txt]
+ if (internal::IsLikelyUri(string)) {
return Status::Invalid(
- "Expected an Azure object path of the form 'container/path...', got
a URI: '",
- s, "'");
+ "Expected an Azure object location of the form 'container/path...',
got a URI: "
+ "'",
+ string, "'");
}
- auto first_sep = s.find_first_of(internal::kSep);
+ auto first_sep = string.find_first_of(internal::kSep);
if (first_sep == 0) {
- return Status::Invalid("Path cannot start with a separator ('", s, "')");
+ return Status::Invalid("Location cannot start with a separator ('",
string, "')");
}
if (first_sep == std::string::npos) {
- return AzurePath{std::string(s), std::string(s), "", {}};
+ return AzureLocation{string, string, "", {}};
}
- AzurePath path;
- path.full_path = std::string(s);
- path.container = std::string(s.substr(0, first_sep));
- path.path_to_file = std::string(s.substr(first_sep + 1));
- path.path_to_file_parts = internal::SplitAbstractPath(path.path_to_file);
- RETURN_NOT_OK(Validate(path));
- return path;
+ AzureLocation location;
+ location.all = string;
+ location.container = string.substr(0, first_sep);
+ location.path = string.substr(first_sep + 1);
+ location.path_parts = internal::SplitAbstractPath(location.path);
+ RETURN_NOT_OK(location.Validate());
+ return location;
}
- static Status Validate(const AzurePath& path) {
- auto status = internal::ValidateAbstractPathParts(path.path_to_file_parts);
- if (!status.ok()) {
- return Status::Invalid(status.message(), " in path ", path.full_path);
- } else {
- return status;
- }
- }
-
- AzurePath parent() const {
+ AzureLocation parent() const {
DCHECK(has_parent());
- auto parent = AzurePath{"", container, "", path_to_file_parts};
- parent.path_to_file_parts.pop_back();
- parent.path_to_file =
internal::JoinAbstractPath(parent.path_to_file_parts);
- if (parent.path_to_file.empty()) {
- parent.full_path = parent.container;
+ AzureLocation parent{"", container, "", path_parts};
+ parent.path_parts.pop_back();
+ parent.path = internal::JoinAbstractPath(parent.path_parts);
+ if (parent.path.empty()) {
+ parent.all = parent.container;
} else {
- parent.full_path = parent.container + internal::kSep +
parent.path_to_file;
+ parent.all = parent.container + internal::kSep + parent.path;
}
return parent;
}
- bool has_parent() const { return !path_to_file.empty(); }
+ bool has_parent() const { return !path.empty(); }
- bool empty() const { return container.empty() && path_to_file.empty(); }
+ bool empty() const { return container.empty() && path.empty(); }
- bool operator==(const AzurePath& other) const {
- return container == other.container && path_to_file == other.path_to_file;
+ bool operator==(const AzureLocation& other) const {
+ return container == other.container && path == other.path;
+ }
+
+ private:
+ Status Validate() {
+ auto status = internal::ValidateAbstractPathParts(path_parts);
+ if (!status.ok()) {
+ return Status::Invalid(status.message(), " in location ", all);
+ } else {
+ return status;
+ }
}
};
-Status PathNotFound(const AzurePath& path) {
- return ::arrow::fs::internal::PathNotFound(path.full_path);
+Status PathNotFound(const AzureLocation& location) {
+ return ::arrow::fs::internal::PathNotFound(location.all);
}
-Status NotAFile(const AzurePath& path) {
- return ::arrow::fs::internal::NotAFile(path.full_path);
+Status NotAFile(const AzureLocation& location) {
+ return ::arrow::fs::internal::NotAFile(location.all);
}
-Status ValidateFilePath(const AzurePath& path) {
- if (path.container.empty()) {
- return PathNotFound(path);
+Status ValidateFileLocation(const AzureLocation& location) {
+ if (location.container.empty()) {
+ return PathNotFound(location);
}
-
- if (path.path_to_file.empty()) {
- return NotAFile(path);
+ if (location.path.empty()) {
+ return NotAFile(location);
}
return Status::OK();
}
@@ -308,10 +310,11 @@ std::shared_ptr<const KeyValueMetadata>
PropertiesToMetadata(
class ObjectInputFile final : public io::RandomAccessFile {
public:
ObjectInputFile(std::shared_ptr<Azure::Storage::Blobs::BlobClient>
blob_client,
- const io::IOContext& io_context, AzurePath path, int64_t
size = kNoSize)
+ const io::IOContext& io_context, AzureLocation location,
+ int64_t size = kNoSize)
: blob_client_(std::move(blob_client)),
io_context_(io_context),
- path_(std::move(path)),
+ location_(std::move(location)),
content_length_(size) {}
Status Init() {
@@ -326,7 +329,7 @@ class ObjectInputFile final : public io::RandomAccessFile {
return Status::OK();
} catch (const Azure::Storage::StorageException& exception) {
if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound)
{
- return PathNotFound(path_);
+ return PathNotFound(location_);
}
return internal::ExceptionToStatus(
"GetProperties failed for '" + blob_client_->GetUrl() +
@@ -451,7 +454,7 @@ class ObjectInputFile final : public io::RandomAccessFile {
private:
std::shared_ptr<Azure::Storage::Blobs::BlobClient> blob_client_;
const io::IOContext io_context_;
- AzurePath path_;
+ AzureLocation location_;
bool closed_ = false;
int64_t pos_ = 0;
@@ -488,22 +491,24 @@ class AzureFileSystem::Impl {
const AzureOptions& options() const { return options_; }
public:
- Result<FileInfo> GetFileInfo(const AzurePath& path) {
+ Result<FileInfo> GetFileInfo(const AzureLocation& location) {
FileInfo info;
- info.set_path(path.full_path);
-
- if (path.container.empty()) {
- DCHECK(path.path_to_file.empty()); // The path is invalid if the
container is empty
- // but not path_to_file.
- // path must refer to the root of the Azure storage account. This is a
directory,
- // and there isn't any extra metadata to fetch.
+ info.set_path(location.all);
+
+ if (location.container.empty()) {
+ // The location is invalid if the container is empty but not
+ // path.
+ DCHECK(location.path.empty());
+ // The location must refer to the root of the Azure storage
+ // account. This is a directory, and there isn't any extra
+ // metadata to fetch.
info.set_type(FileType::Directory);
return info;
}
- if (path.path_to_file.empty()) {
- // path refers to a container. This is a directory if it exists.
+ if (location.path.empty()) {
+ // The location refers to a container. This is a directory if it exists.
auto container_client =
- blob_service_client_->GetBlobContainerClient(path.container);
+ blob_service_client_->GetBlobContainerClient(location.container);
try {
auto properties = container_client.GetProperties();
info.set_type(FileType::Directory);
@@ -522,13 +527,13 @@ class AzureFileSystem::Impl {
exception);
}
}
- auto file_client =
datalake_service_client_->GetFileSystemClient(path.container)
- .GetFileClient(path.path_to_file);
+ auto file_client =
datalake_service_client_->GetFileSystemClient(location.container)
+ .GetFileClient(location.path);
try {
auto properties = file_client.GetProperties();
if (properties.Value.IsDirectory) {
info.set_type(FileType::Directory);
- } else if (internal::HasTrailingSlash(path.path_to_file)) {
+ } else if (internal::HasTrailingSlash(location.path)) {
// For a path with a trailing slash a hierarchical namespace may
return a blob
// with that trailing slash removed. For consistency with flat
namespace and
// other filesystems we chose to return NotFound.
@@ -544,7 +549,7 @@ class AzureFileSystem::Impl {
} catch (const Azure::Storage::StorageException& exception) {
if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound)
{
ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled,
- hierarchical_namespace_.Enabled(path.container));
+
hierarchical_namespace_.Enabled(location.container));
if (hierarchical_namespace_enabled) {
// If the hierarchical namespace is enabled, then the storage
account will have
// explicit directories. Neither a file nor a directory was found.
@@ -557,7 +562,7 @@ class AzureFileSystem::Impl {
// If listing the prefix `path.path_to_file` with trailing slash
returns at least
// one result then `path` refers to an implied directory.
- auto prefix = internal::EnsureTrailingSlash(path.path_to_file);
+ auto prefix = internal::EnsureTrailingSlash(location.path);
list_blob_options.Prefix = prefix;
// We only need to know if there is at least one result, so minimise
page size
// for efficiency.
@@ -565,7 +570,7 @@ class AzureFileSystem::Impl {
try {
auto paged_list_result =
- blob_service_client_->GetBlobContainerClient(path.container)
+ blob_service_client_->GetBlobContainerClient(location.container)
.ListBlobs(list_blob_options);
if (paged_list_result.Blobs.size() > 0) {
info.set_type(FileType::Directory);
@@ -589,17 +594,16 @@ class AzureFileSystem::Impl {
}
}
- Result<std::shared_ptr<ObjectInputFile>> OpenInputFile(const std::string& s,
+ Result<std::shared_ptr<ObjectInputFile>> OpenInputFile(const AzureLocation&
location,
AzureFileSystem* fs) {
- ARROW_RETURN_NOT_OK(internal::AssertNoTrailingSlash(s));
- ARROW_ASSIGN_OR_RAISE(auto path, AzurePath::FromString(s));
- RETURN_NOT_OK(ValidateFilePath(path));
+ RETURN_NOT_OK(ValidateFileLocation(location));
+ ARROW_RETURN_NOT_OK(internal::AssertNoTrailingSlash(location.path));
auto blob_client = std::make_shared<Azure::Storage::Blobs::BlobClient>(
- blob_service_client_->GetBlobContainerClient(path.container)
- .GetBlobClient(path.path_to_file));
+ blob_service_client_->GetBlobContainerClient(location.container)
+ .GetBlobClient(location.path));
- auto ptr =
- std::make_shared<ObjectInputFile>(blob_client, fs->io_context(),
std::move(path));
+ auto ptr = std::make_shared<ObjectInputFile>(blob_client, fs->io_context(),
+ std::move(location));
RETURN_NOT_OK(ptr->Init());
return ptr;
}
@@ -613,26 +617,26 @@ class AzureFileSystem::Impl {
if (info.type() != FileType::File && info.type() != FileType::Unknown) {
return ::arrow::fs::internal::NotAFile(info.path());
}
- ARROW_ASSIGN_OR_RAISE(auto path, AzurePath::FromString(info.path()));
- RETURN_NOT_OK(ValidateFilePath(path));
+ ARROW_ASSIGN_OR_RAISE(auto location,
AzureLocation::FromString(info.path()));
+ RETURN_NOT_OK(ValidateFileLocation(location));
auto blob_client = std::make_shared<Azure::Storage::Blobs::BlobClient>(
- blob_service_client_->GetBlobContainerClient(path.container)
- .GetBlobClient(path.path_to_file));
+ blob_service_client_->GetBlobContainerClient(location.container)
+ .GetBlobClient(location.path));
auto ptr = std::make_shared<ObjectInputFile>(blob_client, fs->io_context(),
- std::move(path), info.size());
+ std::move(location),
info.size());
RETURN_NOT_OK(ptr->Init());
return ptr;
}
- Status CreateDir(const AzurePath& path) {
- if (path.container.empty()) {
+ Status CreateDir(const AzureLocation& location) {
+ if (location.container.empty()) {
return Status::Invalid("Cannot create an empty container");
}
- if (path.path_to_file.empty()) {
+ if (location.path.empty()) {
auto container_client =
- blob_service_client_->GetBlobContainerClient(path.container);
+ blob_service_client_->GetBlobContainerClient(location.container);
try {
auto response = container_client.Create();
if (response.Value.Created) {
@@ -640,18 +644,18 @@ class AzureFileSystem::Impl {
} else {
return StatusFromErrorResponse(
container_client.GetUrl(), response.RawResponse.get(),
- "Failed to create a container: " + path.container);
+ "Failed to create a container: " + location.container);
}
} catch (const Azure::Storage::StorageException& exception) {
return internal::ExceptionToStatus(
- "Failed to create a container: " + path.container + ": " +
+ "Failed to create a container: " + location.container + ": " +
container_client.GetUrl(),
exception);
}
}
ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled,
- hierarchical_namespace_.Enabled(path.container));
+ hierarchical_namespace_.Enabled(location.container));
if (!hierarchical_namespace_enabled) {
// Without hierarchical namespace enabled Azure blob storage has no
directories.
// Therefore we can't, and don't need to create one. Simply creating a
blob with `/`
@@ -659,58 +663,59 @@ class AzureFileSystem::Impl {
return Status::OK();
}
- auto directory_client =
datalake_service_client_->GetFileSystemClient(path.container)
- .GetDirectoryClient(path.path_to_file);
+ auto directory_client =
+ datalake_service_client_->GetFileSystemClient(location.container)
+ .GetDirectoryClient(location.path);
try {
auto response = directory_client.Create();
if (response.Value.Created) {
return Status::OK();
} else {
- return StatusFromErrorResponse(
- directory_client.GetUrl(), response.RawResponse.get(),
- "Failed to create a directory: " + path.path_to_file);
+ return StatusFromErrorResponse(directory_client.GetUrl(),
+ response.RawResponse.get(),
+ "Failed to create a directory: " +
location.path);
}
} catch (const Azure::Storage::StorageException& exception) {
return internal::ExceptionToStatus(
- "Failed to create a directory: " + path.path_to_file + ": " +
+ "Failed to create a directory: " + location.path + ": " +
directory_client.GetUrl(),
exception);
}
}
- Status CreateDirRecursive(const AzurePath& path) {
- if (path.container.empty()) {
+ Status CreateDirRecursive(const AzureLocation& location) {
+ if (location.container.empty()) {
return Status::Invalid("Cannot create an empty container");
}
- auto container_client =
blob_service_client_->GetBlobContainerClient(path.container);
+ auto container_client =
+ blob_service_client_->GetBlobContainerClient(location.container);
try {
container_client.CreateIfNotExists();
} catch (const Azure::Storage::StorageException& exception) {
return internal::ExceptionToStatus(
- "Failed to create a container: " + path.container + " (" +
+ "Failed to create a container: " + location.container + " (" +
container_client.GetUrl() + ")",
exception);
}
ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled,
- hierarchical_namespace_.Enabled(path.container));
+ hierarchical_namespace_.Enabled(location.container));
if (!hierarchical_namespace_enabled) {
- // We can't create a directory without hierarchical namespace
- // support. There is only "virtual directory" without
- // hierarchical namespace support. And a "virtual directory" is
- // (virtually) created a blob with ".../.../blob" blob name
- // automatically.
+ // Without hierarchical namespace enabled Azure blob storage has no
directories.
+ // Therefore we can't, and don't need to create one. Simply creating a
blob with `/`
+ // in the name implies directories.
return Status::OK();
}
- auto directory_client =
datalake_service_client_->GetFileSystemClient(path.container)
- .GetDirectoryClient(path.path_to_file);
+ auto directory_client =
+ datalake_service_client_->GetFileSystemClient(location.container)
+ .GetDirectoryClient(location.path);
try {
directory_client.CreateIfNotExists();
} catch (const Azure::Storage::StorageException& exception) {
return internal::ExceptionToStatus(
- "Failed to create a directory: " + path.path_to_file + " (" +
+ "Failed to create a directory: " + location.path + " (" +
directory_client.GetUrl() + ")",
exception);
}
@@ -733,8 +738,8 @@ bool AzureFileSystem::Equals(const FileSystem& other) const
{
}
Result<FileInfo> AzureFileSystem::GetFileInfo(const std::string& path) {
- ARROW_ASSIGN_OR_RAISE(auto p, AzurePath::FromString(path));
- return impl_->GetFileInfo(p);
+ ARROW_ASSIGN_OR_RAISE(auto location, AzureLocation::FromString(path));
+ return impl_->GetFileInfo(location);
}
Result<FileInfoVector> AzureFileSystem::GetFileInfo(const FileSelector&
select) {
@@ -742,11 +747,11 @@ Result<FileInfoVector> AzureFileSystem::GetFileInfo(const
FileSelector& select)
}
Status AzureFileSystem::CreateDir(const std::string& path, bool recursive) {
- ARROW_ASSIGN_OR_RAISE(auto p, AzurePath::FromString(path));
+ ARROW_ASSIGN_OR_RAISE(auto location, AzureLocation::FromString(path));
if (recursive) {
- return impl_->CreateDirRecursive(p);
+ return impl_->CreateDirRecursive(location);
} else {
- return impl_->CreateDir(p);
+ return impl_->CreateDir(location);
}
}
@@ -776,7 +781,8 @@ Status AzureFileSystem::CopyFile(const std::string& src,
const std::string& dest
Result<std::shared_ptr<io::InputStream>> AzureFileSystem::OpenInputStream(
const std::string& path) {
- return impl_->OpenInputFile(path, this);
+ ARROW_ASSIGN_OR_RAISE(auto location, AzureLocation::FromString(path));
+ return impl_->OpenInputFile(location, this);
}
Result<std::shared_ptr<io::InputStream>> AzureFileSystem::OpenInputStream(
@@ -786,7 +792,8 @@ Result<std::shared_ptr<io::InputStream>>
AzureFileSystem::OpenInputStream(
Result<std::shared_ptr<io::RandomAccessFile>> AzureFileSystem::OpenInputFile(
const std::string& path) {
- return impl_->OpenInputFile(path, this);
+ ARROW_ASSIGN_OR_RAISE(auto location, AzureLocation::FromString(path));
+ return impl_->OpenInputFile(location, this);
}
Result<std::shared_ptr<io::RandomAccessFile>> AzureFileSystem::OpenInputFile(