This is an automated email from the ASF dual-hosted git repository.
felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 4e58f7ca00 GH-39119: [C++] Refactor the Azure FS tests and filesystem
class instantiation (#39207)
4e58f7ca00 is described below
commit 4e58f7ca0016c2b2d8a859a0c5965df3b15523e0
Author: Felipe Oliveira Carvalho <[email protected]>
AuthorDate: Thu Dec 14 15:25:28 2023 -0300
GH-39119: [C++] Refactor the Azure FS tests and filesystem class
instantiation (#39207)
### Rationale for this change
This PR contains some unrelated improvements (like better docs) and some
nitpicky fixes. The test refactoring makes it easier to see on which
environments tests run and make them able to be instantiated with different
options in the future once we extend the `AzureOptions`.
### What changes are included in this PR?
- Random cleanups
- Short namespace aliases
- Refactoring of the tests (multiple environments, TYPED_TEST_SUITE,
explicit preexisting data setup)
- Cleanup of the `AzureOptions` class
### Are these changes tested?
Yes. I created Azure Storage accounts to test with and without Hierarchical
Namespace support. I also ran the tests in a shell without my environment
variables to ensure the test-skipping behavior is correct.
### Are there any user-facing changes?
Changes to `AzureOptions`, but since `AzureFileSystem` is not really used
yet, these breaking changes won't be a problem.
* Closes: #39119
Authored-by: Felipe Oliveira Carvalho <[email protected]>
Signed-off-by: Felipe Oliveira Carvalho <[email protected]>
---
cpp/src/arrow/filesystem/azurefs.cc | 559 ++++++------
cpp/src/arrow/filesystem/azurefs.h | 150 +--
cpp/src/arrow/filesystem/azurefs_internal.cc | 6 +
cpp/src/arrow/filesystem/azurefs_internal.h | 3 -
cpp/src/arrow/filesystem/azurefs_test.cc | 1269 ++++++++++++++------------
5 files changed, 1094 insertions(+), 893 deletions(-)
diff --git a/cpp/src/arrow/filesystem/azurefs.cc
b/cpp/src/arrow/filesystem/azurefs.cc
index 824a8fb531..2178853640 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -33,42 +33,85 @@
#include "arrow/util/logging.h"
#include "arrow/util/string.h"
-namespace arrow {
-namespace fs {
+namespace arrow::fs {
+
+namespace Blobs = Azure::Storage::Blobs;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
+namespace Http = Azure::Core::Http;
+namespace Storage = Azure::Storage;
// -----------------------------------------------------------------------
// AzureOptions Implementation
AzureOptions::AzureOptions() = default;
+AzureOptions::~AzureOptions() = default;
+
bool AzureOptions::Equals(const AzureOptions& other) const {
- return (account_dfs_url == other.account_dfs_url &&
- account_blob_url == other.account_blob_url &&
- credentials_kind == other.credentials_kind &&
- default_metadata == other.default_metadata);
+ // TODO(GH-38598): update here when more auth methods are added.
+ const bool equals = backend == other.backend &&
+ default_metadata == other.default_metadata &&
+ account_blob_url_ == other.account_blob_url_ &&
+ account_dfs_url_ == other.account_dfs_url_ &&
+ credential_kind_ == other.credential_kind_;
+ if (!equals) {
+ return false;
+ }
+ switch (credential_kind_) {
+ case CredentialKind::kAnonymous:
+ return true;
+ case CredentialKind::kStorageSharedKeyCredential:
+ return storage_shared_key_credential_->AccountName ==
+ other.storage_shared_key_credential_->AccountName;
+ }
+ DCHECK(false);
+ return false;
}
-Status AzureOptions::ConfigureAccountKeyCredentials(const std::string&
account_name,
- const std::string&
account_key) {
- if (this->backend == AzureBackend::Azurite) {
- account_blob_url = "http://127.0.0.1:10000/" + account_name + "/";
- account_dfs_url = "http://127.0.0.1:10000/" + account_name + "/";
+Status AzureOptions::ConfigureAccountKeyCredential(const std::string&
account_name,
+ const std::string&
account_key) {
+ if (this->backend == AzureBackend::kAzurite) {
+ account_blob_url_ = "http://127.0.0.1:10000/" + account_name + "/";
+ account_dfs_url_ = "http://127.0.0.1:10000/" + account_name + "/";
} else {
- account_dfs_url = "https://" + account_name + ".dfs.core.windows.net/";
- account_blob_url = "https://" + account_name + ".blob.core.windows.net/";
+ account_dfs_url_ = "https://" + account_name + ".dfs.core.windows.net/";
+ account_blob_url_ = "https://" + account_name + ".blob.core.windows.net/";
}
- storage_credentials_provider =
-
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(account_name,
-
account_key);
- credentials_kind = AzureCredentialsKind::StorageCredentials;
+ credential_kind_ = CredentialKind::kStorageSharedKeyCredential;
+ storage_shared_key_credential_ =
+ std::make_shared<Storage::StorageSharedKeyCredential>(account_name,
account_key);
return Status::OK();
}
+Result<std::unique_ptr<Blobs::BlobServiceClient>>
AzureOptions::MakeBlobServiceClient()
+ const {
+ switch (credential_kind_) {
+ case CredentialKind::kAnonymous:
+ break;
+ case CredentialKind::kStorageSharedKeyCredential:
+ return std::make_unique<Blobs::BlobServiceClient>(account_blob_url_,
+
storage_shared_key_credential_);
+ }
+ return Status::Invalid("AzureOptions doesn't contain a valid auth
configuration");
+}
+
+Result<std::unique_ptr<DataLake::DataLakeServiceClient>>
+AzureOptions::MakeDataLakeServiceClient() const {
+ switch (credential_kind_) {
+ case CredentialKind::kAnonymous:
+ break;
+ case CredentialKind::kStorageSharedKeyCredential:
+ return std::make_unique<DataLake::DataLakeServiceClient>(
+ account_dfs_url_, storage_shared_key_credential_);
+ }
+ return Status::Invalid("AzureOptions doesn't contain a valid auth
configuration");
+}
+
namespace {
-// An AzureFileSystem represents a single Azure storage
-// account. AzureLocation describes a container and path within
-// that storage account.
+// An AzureFileSystem represents an Azure storage account. An AzureLocation
describes a
+// container in that storage account and a path within that container.
struct AzureLocation {
std::string all;
std::string container;
@@ -82,8 +125,8 @@ struct AzureLocation {
// path_parts = [testdir, testfile.txt]
if (internal::IsLikelyUri(string)) {
return Status::Invalid(
- "Expected an Azure object location of the form 'container/path...',
got a URI: "
- "'",
+ "Expected an Azure object location of the form 'container/path...',"
+ " got a URI: '",
string, "'");
}
auto first_sep = string.find_first_of(internal::kSep);
@@ -130,14 +173,15 @@ struct AzureLocation {
private:
Status Validate() {
auto status = internal::ValidateAbstractPathParts(path_parts);
- if (!status.ok()) {
- return Status::Invalid(status.message(), " in location ", all);
- } else {
- return status;
- }
+ return status.ok() ? status : Status::Invalid(status.message(), " in
location ", all);
}
};
+Status ExceptionToStatus(const std::string& prefix,
+ const Azure::Storage::StorageException& exception) {
+ return Status::IOError(prefix, " Azure Error: ", exception.what());
+}
+
Status PathNotFound(const AzureLocation& location) {
return ::arrow::fs::internal::PathNotFound(location.all);
}
@@ -153,23 +197,41 @@ Status ValidateFileLocation(const AzureLocation&
location) {
if (location.path.empty()) {
return NotAFile(location);
}
- ARROW_RETURN_NOT_OK(internal::AssertNoTrailingSlash(location.path));
- return Status::OK();
+ return internal::AssertNoTrailingSlash(location.path);
+}
+
+std::string_view BodyTextView(const Http::RawResponse& raw_response) {
+ const auto& body = raw_response.GetBody();
+#ifndef NDEBUG
+ auto& headers = raw_response.GetHeaders();
+ auto content_type = headers.find("Content-Type");
+ if (content_type != headers.end()) {
+ DCHECK_EQ(std::string_view{content_type->second}.substr(5), "text/");
+ }
+#endif
+ return std::string_view{reinterpret_cast<const char*>(body.data()),
body.size()};
}
Status StatusFromErrorResponse(const std::string& url,
- Azure::Core::Http::RawResponse* raw_response,
+ const Http::RawResponse& raw_response,
const std::string& context) {
- const auto& body = raw_response->GetBody();
// There isn't an Azure specification that response body on error
// doesn't contain any binary data but we assume it. We hope that
// error response body has useful information for the error.
- std::string_view body_text(reinterpret_cast<const char*>(body.data()),
body.size());
- return Status::IOError(context, ": ", url, ": ",
raw_response->GetReasonPhrase(), " (",
- static_cast<int>(raw_response->GetStatusCode()),
+ auto body_text = BodyTextView(raw_response);
+ return Status::IOError(context, ": ", url, ": ",
raw_response.GetReasonPhrase(), " (",
+ static_cast<int>(raw_response.GetStatusCode()),
"): ", body_text);
}
+bool IsContainerNotFound(const Storage::StorageException& exception) {
+ if (exception.ErrorCode == "ContainerNotFound") {
+ DCHECK_EQ(exception.StatusCode, Http::HttpStatusCode::NotFound);
+ return true;
+ }
+ return false;
+}
+
template <typename ArrowType>
std::string FormatValue(typename TypeTraits<ArrowType>::CType value) {
struct StringAppender {
@@ -185,7 +247,7 @@ std::string FormatValue(typename
TypeTraits<ArrowType>::CType value) {
}
std::shared_ptr<const KeyValueMetadata> PropertiesToMetadata(
- const Azure::Storage::Blobs::Models::BlobProperties& properties) {
+ const Blobs::Models::BlobProperties& properties) {
auto metadata = std::make_shared<KeyValueMetadata>();
// Not supported yet:
// * properties.ObjectReplicationSourceProperties
@@ -316,7 +378,7 @@ std::shared_ptr<const KeyValueMetadata>
PropertiesToMetadata(
class ObjectInputFile final : public io::RandomAccessFile {
public:
- ObjectInputFile(std::shared_ptr<Azure::Storage::Blobs::BlobClient>
blob_client,
+ ObjectInputFile(std::shared_ptr<Blobs::BlobClient> blob_client,
const io::IOContext& io_context, AzureLocation location,
int64_t size = kNoSize)
: blob_client_(std::move(blob_client)),
@@ -334,11 +396,11 @@ class ObjectInputFile final : public io::RandomAccessFile
{
content_length_ = properties.Value.BlobSize;
metadata_ = PropertiesToMetadata(properties.Value);
return Status::OK();
- } catch (const Azure::Storage::StorageException& exception) {
- if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound)
{
+ } catch (const Storage::StorageException& exception) {
+ if (exception.StatusCode == Http::HttpStatusCode::NotFound) {
return PathNotFound(location_);
}
- return internal::ExceptionToStatus(
+ return ExceptionToStatus(
"GetProperties failed for '" + blob_client_->GetUrl() +
"' with an unexpected Azure error. Cannot initialise an
ObjectInputFile "
"without knowing the file size.",
@@ -411,20 +473,20 @@ class ObjectInputFile final : public io::RandomAccessFile
{
}
// Read the desired range of bytes
- Azure::Core::Http::HttpRange range{position, nbytes};
- Azure::Storage::Blobs::DownloadBlobToOptions download_options;
+ Http::HttpRange range{position, nbytes};
+ Storage::Blobs::DownloadBlobToOptions download_options;
download_options.Range = range;
try {
return blob_client_
->DownloadTo(reinterpret_cast<uint8_t*>(out), nbytes,
download_options)
.Value.ContentRange.Length.Value();
- } catch (const Azure::Storage::StorageException& exception) {
- return internal::ExceptionToStatus("DownloadTo from '" +
blob_client_->GetUrl() +
- "' at position " +
std::to_string(position) +
- " for " + std::to_string(nbytes) +
- " bytes failed with an Azure
error. ReadAt "
- "failed to read the required byte
range.",
- exception);
+ } catch (const Storage::StorageException& exception) {
+ return ExceptionToStatus("DownloadTo from '" + blob_client_->GetUrl() +
+ "' at position " + std::to_string(position)
+ " for " +
+ std::to_string(nbytes) +
+ " bytes failed with an Azure error. ReadAt "
+ "failed to read the required byte range.",
+ exception);
}
}
@@ -459,7 +521,7 @@ class ObjectInputFile final : public io::RandomAccessFile {
}
private:
- std::shared_ptr<Azure::Storage::Blobs::BlobClient> blob_client_;
+ std::shared_ptr<Blobs::BlobClient> blob_client_;
const io::IOContext io_context_;
AzureLocation location_;
@@ -469,12 +531,11 @@ class ObjectInputFile final : public io::RandomAccessFile
{
std::shared_ptr<const KeyValueMetadata> metadata_;
};
-Status CreateEmptyBlockBlob(
- std::shared_ptr<Azure::Storage::Blobs::BlockBlobClient> block_blob_client)
{
+Status CreateEmptyBlockBlob(std::shared_ptr<Blobs::BlockBlobClient>
block_blob_client) {
try {
block_blob_client->UploadFrom(nullptr, 0);
- } catch (const Azure::Storage::StorageException& exception) {
- return internal::ExceptionToStatus(
+ } catch (const Storage::StorageException& exception) {
+ return ExceptionToStatus(
"UploadFrom failed for '" + block_blob_client->GetUrl() +
"' with an unexpected Azure error. There is no existing blob at
this "
"location or the existing blob must be replaced so
ObjectAppendStream must "
@@ -484,12 +545,12 @@ Status CreateEmptyBlockBlob(
return Status::OK();
}
-Result<Azure::Storage::Blobs::Models::GetBlockListResult> GetBlockList(
- std::shared_ptr<Azure::Storage::Blobs::BlockBlobClient> block_blob_client)
{
+Result<Blobs::Models::GetBlockListResult> GetBlockList(
+ std::shared_ptr<Blobs::BlockBlobClient> block_blob_client) {
try {
return block_blob_client->GetBlockList().Value;
- } catch (Azure::Storage::StorageException& exception) {
- return internal::ExceptionToStatus(
+ } catch (Storage::StorageException& exception) {
+ return ExceptionToStatus(
"GetBlockList failed for '" + block_blob_client->GetUrl() +
"' with an unexpected Azure error. Cannot write to a file without
first "
"fetching the existing block list.",
@@ -497,19 +558,19 @@ Result<Azure::Storage::Blobs::Models::GetBlockListResult>
GetBlockList(
}
}
-Azure::Storage::Metadata ArrowMetadataToAzureMetadata(
+Storage::Metadata ArrowMetadataToAzureMetadata(
const std::shared_ptr<const KeyValueMetadata>& arrow_metadata) {
- Azure::Storage::Metadata azure_metadata;
+ Storage::Metadata azure_metadata;
for (auto key_value : arrow_metadata->sorted_pairs()) {
azure_metadata[key_value.first] = key_value.second;
}
return azure_metadata;
}
-Status CommitBlockList(
- std::shared_ptr<Azure::Storage::Blobs::BlockBlobClient> block_blob_client,
- const std::vector<std::string>& block_ids, const Azure::Storage::Metadata&
metadata) {
- Azure::Storage::Blobs::CommitBlockListOptions options;
+Status CommitBlockList(std::shared_ptr<Storage::Blobs::BlockBlobClient>
block_blob_client,
+ const std::vector<std::string>& block_ids,
+ const Storage::Metadata& metadata) {
+ Blobs::CommitBlockListOptions options;
options.Metadata = metadata;
try {
// CommitBlockList puts all block_ids in the latest element. That means in
the case of
@@ -517,8 +578,8 @@ Status CommitBlockList(
// previously committed blocks.
//
https://learn.microsoft.com/en-us/rest/api/storageservices/put-block-list?tabs=microsoft-entra-id#request-body
block_blob_client->CommitBlockList(block_ids, options);
- } catch (const Azure::Storage::StorageException& exception) {
- return internal::ExceptionToStatus(
+ } catch (const Storage::StorageException& exception) {
+ return ExceptionToStatus(
"CommitBlockList failed for '" + block_blob_client->GetUrl() +
"' with an unexpected Azure error. Committing is required to flush
an "
"output/append stream.",
@@ -529,11 +590,10 @@ Status CommitBlockList(
class ObjectAppendStream final : public io::OutputStream {
public:
- ObjectAppendStream(
- std::shared_ptr<Azure::Storage::Blobs::BlockBlobClient>
block_blob_client,
- const io::IOContext& io_context, const AzureLocation& location,
- const std::shared_ptr<const KeyValueMetadata>& metadata,
- const AzureOptions& options, int64_t size = kNoSize)
+ ObjectAppendStream(std::shared_ptr<Blobs::BlockBlobClient> block_blob_client,
+ const io::IOContext& io_context, const AzureLocation&
location,
+ const std::shared_ptr<const KeyValueMetadata>& metadata,
+ const AzureOptions& options, int64_t size = kNoSize)
: block_blob_client_(std::move(block_blob_client)),
io_context_(io_context),
location_(location),
@@ -560,11 +620,11 @@ class ObjectAppendStream final : public io::OutputStream {
auto properties = block_blob_client_->GetProperties();
content_length_ = properties.Value.BlobSize;
pos_ = content_length_;
- } catch (const Azure::Storage::StorageException& exception) {
- if (exception.StatusCode ==
Azure::Core::Http::HttpStatusCode::NotFound) {
+ } catch (const Storage::StorageException& exception) {
+ if (exception.StatusCode == Http::HttpStatusCode::NotFound) {
RETURN_NOT_OK(CreateEmptyBlockBlob(block_blob_client_));
} else {
- return internal::ExceptionToStatus(
+ return ExceptionToStatus(
"GetProperties failed for '" + block_blob_client_->GetUrl() +
"' with an unexpected Azure error. Cannot initialise an "
"ObjectAppendStream without knowing whether a file already
exists at "
@@ -634,7 +694,7 @@ class ObjectAppendStream final : public io::OutputStream {
std::shared_ptr<Buffer> owned_buffer = nullptr) {
RETURN_NOT_OK(CheckClosed("append"));
auto append_data = reinterpret_cast<const uint8_t*>(data);
- Azure::Core::IO::MemoryBodyStream block_content(append_data, nbytes);
+ Core::IO::MemoryBodyStream block_content(append_data, nbytes);
if (block_content.Length() == 0) {
return Status::OK();
}
@@ -657,13 +717,13 @@ class ObjectAppendStream final : public io::OutputStream {
// if the blob was previously created with one block, with id
`00001-arrow` then the
// next block we append will conflict with that, and cause corruption.
new_block_id += "-arrow";
- new_block_id = Azure::Core::Convert::Base64Encode(
+ new_block_id = Core::Convert::Base64Encode(
std::vector<uint8_t>(new_block_id.begin(), new_block_id.end()));
try {
block_blob_client_->StageBlock(new_block_id, block_content);
- } catch (const Azure::Storage::StorageException& exception) {
- return internal::ExceptionToStatus(
+ } catch (const Storage::StorageException& exception) {
+ return ExceptionToStatus(
"StageBlock failed for '" + block_blob_client_->GetUrl() + "'
new_block_id: '" +
new_block_id +
"' with an unexpected Azure error. Staging new blocks is
fundamental to "
@@ -676,7 +736,7 @@ class ObjectAppendStream final : public io::OutputStream {
return Status::OK();
}
- std::shared_ptr<Azure::Storage::Blobs::BlockBlobClient> block_blob_client_;
+ std::shared_ptr<Blobs::BlockBlobClient> block_blob_client_;
const io::IOContext io_context_;
const AzureLocation location_;
@@ -684,7 +744,7 @@ class ObjectAppendStream final : public io::OutputStream {
int64_t pos_ = 0;
int64_t content_length_ = kNoSize;
std::vector<std::string> block_ids_;
- Azure::Storage::Metadata metadata_;
+ Storage::Metadata metadata_;
};
} // namespace
@@ -693,27 +753,31 @@ class ObjectAppendStream final : public io::OutputStream {
// AzureFilesystem Implementation
class AzureFileSystem::Impl {
- public:
+ private:
io::IOContext io_context_;
- std::unique_ptr<Azure::Storage::Files::DataLake::DataLakeServiceClient>
- datalake_service_client_;
- std::unique_ptr<Azure::Storage::Blobs::BlobServiceClient>
blob_service_client_;
AzureOptions options_;
- internal::HierarchicalNamespaceDetector hierarchical_namespace_;
- explicit Impl(AzureOptions options, io::IOContext io_context)
- : io_context_(io_context), options_(std::move(options)) {}
+ std::unique_ptr<DataLake::DataLakeServiceClient> datalake_service_client_;
+ std::unique_ptr<Blobs::BlobServiceClient> blob_service_client_;
+ internal::HierarchicalNamespaceDetector hns_detector_;
- Status Init() {
- blob_service_client_ =
std::make_unique<Azure::Storage::Blobs::BlobServiceClient>(
- options_.account_blob_url, options_.storage_credentials_provider);
- datalake_service_client_ =
-
std::make_unique<Azure::Storage::Files::DataLake::DataLakeServiceClient>(
- options_.account_dfs_url, options_.storage_credentials_provider);
-
RETURN_NOT_OK(hierarchical_namespace_.Init(datalake_service_client_.get()));
- return Status::OK();
- }
+ Impl(AzureOptions options, io::IOContext io_context)
+ : io_context_(std::move(io_context)), options_(std::move(options)) {}
+ public:
+ static Result<std::unique_ptr<AzureFileSystem::Impl>> Make(AzureOptions
options,
+ io::IOContext
io_context) {
+ auto self = std::unique_ptr<AzureFileSystem::Impl>(
+ new AzureFileSystem::Impl(std::move(options), std::move(io_context)));
+ ARROW_ASSIGN_OR_RAISE(self->blob_service_client_,
+ self->options_.MakeBlobServiceClient());
+ ARROW_ASSIGN_OR_RAISE(self->datalake_service_client_,
+ self->options_.MakeDataLakeServiceClient());
+
RETURN_NOT_OK(self->hns_detector_.Init(self->datalake_service_client_.get()));
+ return self;
+ }
+
+ io::IOContext& io_context() { return io_context_; }
const AzureOptions& options() const { return options_; }
public:
@@ -722,12 +786,10 @@ class AzureFileSystem::Impl {
info.set_path(location.all);
if (location.container.empty()) {
- // The location is invalid if the container is empty but not
- // path.
+ // The location is invalid if the container is empty but the path is not.
DCHECK(location.path.empty());
- // The location must refer to the root of the Azure storage
- // account. This is a directory, and there isn't any extra
- // metadata to fetch.
+ // This location must be derived from the root path. FileInfo should
describe it
+ // as a directory and there isn't any extra metadata to fetch.
info.set_type(FileType::Directory);
return info;
}
@@ -739,20 +801,22 @@ class AzureFileSystem::Impl {
auto properties = container_client.GetProperties();
info.set_type(FileType::Directory);
info.set_mtime(
-
std::chrono::system_clock::time_point(properties.Value.LastModified));
+
std::chrono::system_clock::time_point{properties.Value.LastModified});
return info;
- } catch (const Azure::Storage::StorageException& exception) {
- if (exception.StatusCode ==
Azure::Core::Http::HttpStatusCode::NotFound) {
+ } catch (const Storage::StorageException& exception) {
+ if (IsContainerNotFound(exception)) {
info.set_type(FileType::NotFound);
return info;
}
- return internal::ExceptionToStatus(
+ return ExceptionToStatus(
"GetProperties for '" + container_client.GetUrl() +
"' failed with an unexpected Azure error. GetFileInfo is
unable to "
"determine whether the container exists.",
exception);
}
}
+
+ // There is a path to search within the container.
auto file_client =
datalake_service_client_->GetFileSystemClient(location.container)
.GetFileClient(location.path);
try {
@@ -763,6 +827,8 @@ class AzureFileSystem::Impl {
// For a path with a trailing slash a hierarchical namespace may
return a blob
// with that trailing slash removed. For consistency with flat
namespace and
// other filesystems we chose to return NotFound.
+ //
+ // NOTE(felipecrv): could this be an empty directory marker?
info.set_type(FileType::NotFound);
return info;
} else {
@@ -770,12 +836,12 @@ class AzureFileSystem::Impl {
info.set_size(properties.Value.FileSize);
}
info.set_mtime(
-
std::chrono::system_clock::time_point(properties.Value.LastModified));
+
std::chrono::system_clock::time_point{properties.Value.LastModified});
return info;
- } catch (const Azure::Storage::StorageException& exception) {
- if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound)
{
+ } catch (const Storage::StorageException& exception) {
+ if (exception.StatusCode == Http::HttpStatusCode::NotFound) {
ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled,
-
hierarchical_namespace_.Enabled(location.container));
+ hns_detector_.Enabled(location.container));
if (hierarchical_namespace_enabled) {
// If the hierarchical namespace is enabled, then the storage
account will have
// explicit directories. Neither a file nor a directory was found.
@@ -784,12 +850,10 @@ class AzureFileSystem::Impl {
}
// On flat namespace accounts there are no real directories.
Directories are only
// implied by using `/` in the blob name.
- Azure::Storage::Blobs::ListBlobsOptions list_blob_options;
-
+ Blobs::ListBlobsOptions list_blob_options;
// If listing the prefix `path.path_to_file` with trailing slash
returns at least
// one result then `path` refers to an implied directory.
- auto prefix = internal::EnsureTrailingSlash(location.path);
- list_blob_options.Prefix = prefix;
+ list_blob_options.Prefix =
internal::EnsureTrailingSlash(location.path);
// We only need to know if there is at least one result, so minimise
page size
// for efficiency.
list_blob_options.PageSizeHint = 1;
@@ -798,21 +862,19 @@ class AzureFileSystem::Impl {
auto paged_list_result =
blob_service_client_->GetBlobContainerClient(location.container)
.ListBlobs(list_blob_options);
- if (paged_list_result.Blobs.size() > 0) {
- info.set_type(FileType::Directory);
- } else {
- info.set_type(FileType::NotFound);
- }
+ auto file_type = paged_list_result.Blobs.size() > 0 ?
FileType::Directory
+ :
FileType::NotFound;
+ info.set_type(file_type);
return info;
- } catch (const Azure::Storage::StorageException& exception) {
- return internal::ExceptionToStatus(
- "ListBlobs for '" + prefix +
+ } catch (const Storage::StorageException& exception) {
+ return ExceptionToStatus(
+ "ListBlobs for '" + *list_blob_options.Prefix +
"' failed with an unexpected Azure error. GetFileInfo is
unable to "
"determine whether the path should be considered an implied
directory.",
exception);
}
}
- return internal::ExceptionToStatus(
+ return ExceptionToStatus(
"GetProperties for '" + file_client.GetUrl() +
"' failed with an unexpected "
"Azure error. GetFileInfo is unable to determine whether the
path exists.",
@@ -822,9 +884,8 @@ class AzureFileSystem::Impl {
private:
template <typename OnContainer>
- Status VisitContainers(const Azure::Core::Context& context,
- OnContainer&& on_container) const {
- Azure::Storage::Blobs::ListBlobContainersOptions options;
+ Status VisitContainers(const Core::Context& context, OnContainer&&
on_container) const {
+ Blobs::ListBlobContainersOptions options;
try {
auto container_list_response =
blob_service_client_->ListBlobContainers(options, context);
@@ -834,14 +895,14 @@ class AzureFileSystem::Impl {
RETURN_NOT_OK(on_container(container));
}
}
- } catch (const Azure::Storage::StorageException& exception) {
- return internal::ExceptionToStatus("Failed to list account containers.",
exception);
+ } catch (const Storage::StorageException& exception) {
+ return ExceptionToStatus("Failed to list account containers.",
exception);
}
return Status::OK();
}
- static FileInfo FileInfoFromBlob(const std::string& container,
- const
Azure::Storage::Blobs::Models::BlobItem& blob) {
+ static FileInfo FileInfoFromBlob(std::string_view container,
+ const Blobs::Models::BlobItem& blob) {
auto path = internal::ConcatAbstractPath(container, blob.Name);
if (internal::HasTrailingSlash(blob.Name)) {
return DirectoryFileInfoFromPath(path);
@@ -852,7 +913,7 @@ class AzureFileSystem::Impl {
return info;
}
- static FileInfo DirectoryFileInfoFromPath(const std::string& path) {
+ static FileInfo DirectoryFileInfoFromPath(std::string_view path) {
return FileInfo{std::string{internal::RemoveTrailingSlash(path)},
FileType::Directory};
}
@@ -870,13 +931,13 @@ class AzureFileSystem::Impl {
/// \pre container_client is the client for the container named like the
first
/// segment of select.base_dir.
Status GetFileInfoWithSelectorFromContainer(
- const Azure::Storage::Blobs::BlobContainerClient& container_client,
- const Azure::Core::Context& context, Azure::Nullable<int32_t>
page_size_hint,
- const FileSelector& select, FileInfoVector* acc_results) {
+ const Blobs::BlobContainerClient& container_client, const Core::Context&
context,
+ Azure::Nullable<int32_t> page_size_hint, const FileSelector& select,
+ FileInfoVector* acc_results) {
ARROW_ASSIGN_OR_RAISE(auto base_location,
AzureLocation::FromString(select.base_dir));
bool found = false;
- Azure::Storage::Blobs::ListBlobsOptions options;
+ Blobs::ListBlobsOptions options;
if (internal::IsEmptyPath(base_location.path)) {
// If the base_dir is the root of the container, then we want to list
all blobs in
// the container and the Prefix should be empty and not even include the
trailing
@@ -887,7 +948,7 @@ class AzureFileSystem::Impl {
options.Prefix = internal::EnsureTrailingSlash(base_location.path);
}
options.PageSizeHint = page_size_hint;
- options.Include =
Azure::Storage::Blobs::Models::ListBlobsIncludeFlags::Metadata;
+ options.Include = Blobs::Models::ListBlobsIncludeFlags::Metadata;
auto recurse = [&](const std::string& blob_prefix) noexcept -> Status {
if (select.recursive && select.max_recursion > 0) {
@@ -903,15 +964,14 @@ class AzureFileSystem::Impl {
return Status::OK();
};
- auto process_blob =
- [&](const Azure::Storage::Blobs::Models::BlobItem& blob) noexcept {
- // blob.Name has trailing slash only when Prefix is an empty
- // directory marker blob for the directory we're listing
- // from, and we should skip it.
- if (!internal::HasTrailingSlash(blob.Name)) {
- acc_results->push_back(FileInfoFromBlob(base_location.container,
blob));
- }
- };
+ auto process_blob = [&](const Blobs::Models::BlobItem& blob) noexcept {
+ // blob.Name has trailing slash only when Prefix is an empty
+ // directory marker blob for the directory we're listing
+ // from, and we should skip it.
+ if (!internal::HasTrailingSlash(blob.Name)) {
+ acc_results->push_back(FileInfoFromBlob(base_location.container,
blob));
+ }
+ };
auto process_prefix = [&](const std::string& prefix) noexcept -> Status {
const auto path = internal::ConcatAbstractPath(base_location.container,
prefix);
acc_results->push_back(DirectoryFileInfoFromPath(path));
@@ -964,14 +1024,13 @@ class AzureFileSystem::Impl {
RETURN_NOT_OK(process_prefix(list_response.BlobPrefixes[blob_prefix_index]));
}
}
- } catch (const Azure::Storage::StorageException& exception) {
- if (exception.ErrorCode == "ContainerNotFound") {
+ } catch (const Storage::StorageException& exception) {
+ if (IsContainerNotFound(exception)) {
found = false;
} else {
- return internal::ExceptionToStatus(
- "Failed to list blobs in a directory: " + select.base_dir + ": " +
- container_client.GetUrl(),
- exception);
+ return ExceptionToStatus("Failed to list blobs in a directory: " +
+ select.base_dir + ": " +
container_client.GetUrl(),
+ exception);
}
}
@@ -981,7 +1040,7 @@ class AzureFileSystem::Impl {
}
public:
- Status GetFileInfoWithSelector(const Azure::Core::Context& context,
+ Status GetFileInfoWithSelector(const Core::Context& context,
Azure::Nullable<int32_t> page_size_hint,
const FileSelector& select,
FileInfoVector* acc_results) {
@@ -991,29 +1050,28 @@ class AzureFileSystem::Impl {
// Without a container, the base_location is equivalent to the filesystem
// root -- `/`. FileSelector::allow_not_found doesn't matter in this case
// because the root always exists.
- auto on_container =
- [&](const Azure::Storage::Blobs::Models::BlobContainerItem&
container) {
- // Deleted containers are not listed by ListContainers.
- DCHECK(!container.IsDeleted);
-
- // Every container is considered a directory.
- FileInfo info{container.Name, FileType::Directory};
- info.set_mtime(
-
std::chrono::system_clock::time_point{container.Details.LastModified});
- acc_results->push_back(std::move(info));
-
- // Recurse into containers (subdirectories) if requested.
- if (select.recursive && select.max_recursion > 0) {
- FileSelector sub_select;
- sub_select.base_dir = container.Name;
- sub_select.allow_not_found = true;
- sub_select.recursive = true;
- sub_select.max_recursion = select.max_recursion - 1;
- ARROW_RETURN_NOT_OK(GetFileInfoWithSelector(context,
page_size_hint,
- sub_select,
acc_results));
- }
- return Status::OK();
- };
+ auto on_container = [&](const Blobs::Models::BlobContainerItem&
container) {
+ // Deleted containers are not listed by ListContainers.
+ DCHECK(!container.IsDeleted);
+
+ // Every container is considered a directory.
+ FileInfo info{container.Name, FileType::Directory};
+ info.set_mtime(
+
std::chrono::system_clock::time_point{container.Details.LastModified});
+ acc_results->push_back(std::move(info));
+
+ // Recurse into containers (subdirectories) if requested.
+ if (select.recursive && select.max_recursion > 0) {
+ FileSelector sub_select;
+ sub_select.base_dir = container.Name;
+ sub_select.allow_not_found = true;
+ sub_select.recursive = true;
+ sub_select.max_recursion = select.max_recursion - 1;
+ ARROW_RETURN_NOT_OK(
+ GetFileInfoWithSelector(context, page_size_hint, sub_select,
acc_results));
+ }
+ return Status::OK();
+ };
return VisitContainers(context, std::move(on_container));
}
@@ -1026,7 +1084,7 @@ class AzureFileSystem::Impl {
Result<std::shared_ptr<ObjectInputFile>> OpenInputFile(const AzureLocation&
location,
AzureFileSystem* fs) {
RETURN_NOT_OK(ValidateFileLocation(location));
- auto blob_client = std::make_shared<Azure::Storage::Blobs::BlobClient>(
+ auto blob_client = std::make_shared<Blobs::BlobClient>(
blob_service_client_->GetBlobContainerClient(location.container)
.GetBlobClient(location.path));
@@ -1046,7 +1104,7 @@ class AzureFileSystem::Impl {
}
ARROW_ASSIGN_OR_RAISE(auto location,
AzureLocation::FromString(info.path()));
RETURN_NOT_OK(ValidateFileLocation(location));
- auto blob_client = std::make_shared<Azure::Storage::Blobs::BlobClient>(
+ auto blob_client = std::make_shared<Blobs::BlobClient>(
blob_service_client_->GetBlobContainerClient(location.container)
.GetBlobClient(location.path));
@@ -1070,19 +1128,18 @@ class AzureFileSystem::Impl {
return Status::OK();
} else {
return StatusFromErrorResponse(
- container_client.GetUrl(), response.RawResponse.get(),
+ container_client.GetUrl(), *response.RawResponse,
"Failed to create a container: " + location.container);
}
- } catch (const Azure::Storage::StorageException& exception) {
- return internal::ExceptionToStatus(
- "Failed to create a container: " + location.container + ": " +
- container_client.GetUrl(),
- exception);
+ } catch (const Storage::StorageException& exception) {
+ return ExceptionToStatus("Failed to create a container: " +
location.container +
+ ": " + container_client.GetUrl(),
+ exception);
}
}
ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled,
- hierarchical_namespace_.Enabled(location.container));
+ hns_detector_.Enabled(location.container));
if (!hierarchical_namespace_enabled) {
// Without hierarchical namespace enabled Azure blob storage has no
directories.
// Therefore we can't, and don't need to create one. Simply creating a
blob with `/`
@@ -1098,15 +1155,13 @@ class AzureFileSystem::Impl {
if (response.Value.Created) {
return Status::OK();
} else {
- return StatusFromErrorResponse(directory_client.GetUrl(),
- response.RawResponse.get(),
+ return StatusFromErrorResponse(directory_client.GetUrl(),
*response.RawResponse,
"Failed to create a directory: " +
location.path);
}
- } catch (const Azure::Storage::StorageException& exception) {
- return internal::ExceptionToStatus(
- "Failed to create a directory: " + location.path + ": " +
- directory_client.GetUrl(),
- exception);
+ } catch (const Storage::StorageException& exception) {
+ return ExceptionToStatus("Failed to create a directory: " +
location.path + ": " +
+ directory_client.GetUrl(),
+ exception);
}
}
@@ -1119,15 +1174,14 @@ class AzureFileSystem::Impl {
blob_service_client_->GetBlobContainerClient(location.container);
try {
container_client.CreateIfNotExists();
- } catch (const Azure::Storage::StorageException& exception) {
- return internal::ExceptionToStatus(
- "Failed to create a container: " + location.container + " (" +
- container_client.GetUrl() + ")",
- exception);
+ } catch (const Storage::StorageException& exception) {
+ return ExceptionToStatus("Failed to create a container: " +
location.container +
+ " (" + container_client.GetUrl() + ")",
+ exception);
}
ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled,
- hierarchical_namespace_.Enabled(location.container));
+ hns_detector_.Enabled(location.container));
if (!hierarchical_namespace_enabled) {
// Without hierarchical namespace enabled Azure blob storage has no
directories.
// Therefore we can't, and don't need to create one. Simply creating a
blob with `/`
@@ -1141,11 +1195,10 @@ class AzureFileSystem::Impl {
.GetDirectoryClient(location.path);
try {
directory_client.CreateIfNotExists();
- } catch (const Azure::Storage::StorageException& exception) {
- return internal::ExceptionToStatus(
- "Failed to create a directory: " + location.path + " (" +
- directory_client.GetUrl() + ")",
- exception);
+ } catch (const Storage::StorageException& exception) {
+ return ExceptionToStatus("Failed to create a directory: " +
location.path + " (" +
+ directory_client.GetUrl() + ")",
+ exception);
}
}
@@ -1158,7 +1211,7 @@ class AzureFileSystem::Impl {
AzureFileSystem* fs) {
RETURN_NOT_OK(ValidateFileLocation(location));
- auto block_blob_client =
std::make_shared<Azure::Storage::Blobs::BlockBlobClient>(
+ auto block_blob_client = std::make_shared<Blobs::BlockBlobClient>(
blob_service_client_->GetBlobContainerClient(location.container)
.GetBlockBlobClient(location.path));
@@ -1180,7 +1233,7 @@ class AzureFileSystem::Impl {
bool missing_dir_ok) {
auto container_client =
blob_service_client_->GetBlobContainerClient(location.container);
- Azure::Storage::Blobs::ListBlobsOptions options;
+ Blobs::ListBlobsOptions options;
if (!location.path.empty()) {
options.Prefix = internal::EnsureTrailingSlash(location.path);
}
@@ -1200,19 +1253,17 @@ class AzureFileSystem::Impl {
continue;
}
auto batch = container_client.CreateBatch();
- std::vector<Azure::Storage::DeferredResponse<
- Azure::Storage::Blobs::Models::DeleteBlobResult>>
+ std::vector<Storage::DeferredResponse<Blobs::Models::DeleteBlobResult>>
deferred_responses;
for (const auto& blob_item : list_response.Blobs) {
deferred_responses.push_back(batch.DeleteBlob(blob_item.Name));
}
try {
container_client.SubmitBatch(batch);
- } catch (const Azure::Storage::StorageException& exception) {
- return internal::ExceptionToStatus(
- "Failed to delete blobs in a directory: " + location.path + ": "
+
- container_client.GetUrl(),
- exception);
+ } catch (const Storage::StorageException& exception) {
+ return ExceptionToStatus("Failed to delete blobs in a directory: " +
+ location.path + ": " +
container_client.GetUrl(),
+ exception);
}
std::vector<std::string> failed_blob_names;
for (size_t i = 0; i < deferred_responses.size(); ++i) {
@@ -1221,7 +1272,7 @@ class AzureFileSystem::Impl {
try {
auto delete_result = deferred_response.GetResponse();
success = delete_result.Value.Deleted;
- } catch (const Azure::Storage::StorageException& exception) {
+ } catch (const Storage::StorageException& exception) {
success = false;
}
if (!success) {
@@ -1240,11 +1291,10 @@ class AzureFileSystem::Impl {
}
}
}
- } catch (const Azure::Storage::StorageException& exception) {
- return internal::ExceptionToStatus(
- "Failed to list blobs in a directory: " + location.path + ": " +
- container_client.GetUrl(),
- exception);
+ } catch (const Storage::StorageException& exception) {
+ return ExceptionToStatus("Failed to list blobs in a directory: " +
location.path +
+ ": " + container_client.GetUrl(),
+ exception);
}
return Status::OK();
}
@@ -1264,19 +1314,18 @@ class AzureFileSystem::Impl {
return Status::OK();
} else {
return StatusFromErrorResponse(
- container_client.GetUrl(), response.RawResponse.get(),
+ container_client.GetUrl(), *response.RawResponse,
"Failed to delete a container: " + location.container);
}
- } catch (const Azure::Storage::StorageException& exception) {
- return internal::ExceptionToStatus(
- "Failed to delete a container: " + location.container + ": " +
- container_client.GetUrl(),
- exception);
+ } catch (const Storage::StorageException& exception) {
+ return ExceptionToStatus("Failed to delete a container: " +
location.container +
+ ": " + container_client.GetUrl(),
+ exception);
}
}
ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled,
- hierarchical_namespace_.Enabled(location.container));
+ hns_detector_.Enabled(location.container));
if (hierarchical_namespace_enabled) {
auto directory_client =
datalake_service_client_->GetFileSystemClient(location.container)
@@ -1287,14 +1336,13 @@ class AzureFileSystem::Impl {
return Status::OK();
} else {
return StatusFromErrorResponse(
- directory_client.GetUrl(), response.RawResponse.get(),
+ directory_client.GetUrl(), *response.RawResponse,
"Failed to delete a directory: " + location.path);
}
- } catch (const Azure::Storage::StorageException& exception) {
- return internal::ExceptionToStatus(
- "Failed to delete a directory: " + location.path + ": " +
- directory_client.GetUrl(),
- exception);
+ } catch (const Storage::StorageException& exception) {
+ return ExceptionToStatus("Failed to delete a directory: " +
location.path + ": " +
+ directory_client.GetUrl(),
+ exception);
}
} else {
return DeleteDirContentsWithoutHierarchicalNamespace(location,
@@ -1308,7 +1356,7 @@ class AzureFileSystem::Impl {
}
ARROW_ASSIGN_OR_RAISE(auto hierarchical_namespace_enabled,
- hierarchical_namespace_.Enabled(location.container));
+ hns_detector_.Enabled(location.container));
if (hierarchical_namespace_enabled) {
auto file_system_client =
datalake_service_client_->GetFileSystemClient(location.container);
@@ -1322,8 +1370,8 @@ class AzureFileSystem::Impl {
file_system_client.GetDirectoryClient(path.Name);
try {
sub_directory_client.DeleteRecursive();
- } catch (const Azure::Storage::StorageException& exception) {
- return internal::ExceptionToStatus(
+ } catch (const Storage::StorageException& exception) {
+ return ExceptionToStatus(
"Failed to delete a sub directory: " + location.container +
internal::kSep + path.Name + ": " +
sub_directory_client.GetUrl(),
exception);
@@ -1332,8 +1380,8 @@ class AzureFileSystem::Impl {
auto sub_file_client =
file_system_client.GetFileClient(path.Name);
try {
sub_file_client.Delete();
- } catch (const Azure::Storage::StorageException& exception) {
- return internal::ExceptionToStatus(
+ } catch (const Storage::StorageException& exception) {
+ return ExceptionToStatus(
"Failed to delete a sub file: " + location.container +
internal::kSep + path.Name + ": " +
sub_file_client.GetUrl(),
exception);
@@ -1341,15 +1389,13 @@ class AzureFileSystem::Impl {
}
}
}
- } catch (const Azure::Storage::StorageException& exception) {
- if (missing_dir_ok &&
- exception.StatusCode ==
Azure::Core::Http::HttpStatusCode::NotFound) {
+ } catch (const Storage::StorageException& exception) {
+ if (missing_dir_ok && exception.StatusCode ==
Http::HttpStatusCode::NotFound) {
return Status::OK();
} else {
- return internal::ExceptionToStatus(
- "Failed to delete directory contents: " + location.path + ": " +
- directory_client.GetUrl(),
- exception);
+ return ExceptionToStatus("Failed to delete directory contents: " +
+ location.path + ": " +
directory_client.GetUrl(),
+ exception);
}
}
return Status::OK();
@@ -1371,8 +1417,8 @@ class AzureFileSystem::Impl {
.GetUrl();
try {
dest_blob_client.CopyFromUri(src_url);
- } catch (const Azure::Storage::StorageException& exception) {
- return internal::ExceptionToStatus(
+ } catch (const Storage::StorageException& exception) {
+ return ExceptionToStatus(
"Failed to copy a blob. (" + src_url + " -> " +
dest_blob_client.GetUrl() + ")",
exception);
}
@@ -1380,6 +1426,17 @@ class AzureFileSystem::Impl {
}
};
+AzureFileSystem::AzureFileSystem(std::unique_ptr<Impl>&& impl)
+ : FileSystem(impl->io_context()), impl_(std::move(impl)) {
+ default_async_is_sync_ = false;
+}
+
+Result<std::shared_ptr<AzureFileSystem>> AzureFileSystem::Make(
+ const AzureOptions& options, const io::IOContext& io_context) {
+ ARROW_ASSIGN_OR_RAISE(auto impl, AzureFileSystem::Impl::Make(options,
io_context));
+ return std::shared_ptr<AzureFileSystem>(new
AzureFileSystem(std::move(impl)));
+}
+
const AzureOptions& AzureFileSystem::options() const { return
impl_->options(); }
bool AzureFileSystem::Equals(const FileSystem& other) const {
@@ -1399,7 +1456,7 @@ Result<FileInfo> AzureFileSystem::GetFileInfo(const
std::string& path) {
}
Result<FileInfoVector> AzureFileSystem::GetFileInfo(const FileSelector&
select) {
- Azure::Core::Context context;
+ Core::Context context;
Azure::Nullable<int32_t> page_size_hint; // unspecified
FileInfoVector results;
RETURN_NOT_OK(
@@ -1478,18 +1535,4 @@ Result<std::shared_ptr<io::OutputStream>>
AzureFileSystem::OpenAppendStream(
return impl_->OpenAppendStream(location, metadata, false, this);
}
-Result<std::shared_ptr<AzureFileSystem>> AzureFileSystem::Make(
- const AzureOptions& options, const io::IOContext& io_context) {
- std::shared_ptr<AzureFileSystem> ptr(new AzureFileSystem(options,
io_context));
- RETURN_NOT_OK(ptr->impl_->Init());
- return ptr;
-}
-
-AzureFileSystem::AzureFileSystem(const AzureOptions& options,
- const io::IOContext& io_context)
- : FileSystem(io_context), impl_(std::make_unique<Impl>(options,
io_context)) {
- default_async_is_sync_ = false;
-}
-
-} // namespace fs
-} // namespace arrow
+} // namespace arrow::fs
diff --git a/cpp/src/arrow/filesystem/azurefs.h
b/cpp/src/arrow/filesystem/azurefs.h
index b2865b059e..1266aa2d02 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -25,90 +25,118 @@
#include "arrow/util/macros.h"
#include "arrow/util/uri.h"
-namespace Azure {
-namespace Core {
-namespace Credentials {
-
+namespace Azure::Core::Credentials {
class TokenCredential;
+}
-} // namespace Credentials
-} // namespace Core
-namespace Storage {
-
+namespace Azure::Storage {
class StorageSharedKeyCredential;
+}
-} // namespace Storage
-} // namespace Azure
-
-namespace arrow {
-namespace fs {
-
-enum class AzureCredentialsKind : int8_t {
- /// Anonymous access (no credentials used), public
- Anonymous,
- /// Use explicitly-provided access key pair
- StorageCredentials,
- /// Use ServicePrincipleCredentials
- ServicePrincipleCredentials,
- /// Use Sas Token to authenticate
- Sas,
- /// Use Connection String
- ConnectionString
-};
+namespace Azure::Storage::Blobs {
+class BlobServiceClient;
+}
+
+namespace Azure::Storage::Files::DataLake {
+class DataLakeServiceClient;
+}
-enum class AzureBackend : bool {
- /// Official Azure Remote Backend
- Azure,
- /// Local Simulated Storage
- Azurite
+namespace arrow::fs {
+
+enum class AzureBackend {
+ /// \brief Official Azure Remote Backend
+ kAzure,
+ /// \brief Local Simulated Storage
+ kAzurite
};
/// Options for the AzureFileSystem implementation.
struct ARROW_EXPORT AzureOptions {
- std::string account_dfs_url;
- std::string account_blob_url;
- AzureBackend backend = AzureBackend::Azure;
- AzureCredentialsKind credentials_kind = AzureCredentialsKind::Anonymous;
+ /// \brief The backend to connect to: Azure or Azurite (for testing).
+ AzureBackend backend = AzureBackend::kAzure;
- std::string sas_token;
- std::string connection_string;
- std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
- storage_credentials_provider;
- std::shared_ptr<Azure::Core::Credentials::TokenCredential>
- service_principle_credentials_provider;
+ // TODO(GH-38598): Add support for more auth methods.
+ // std::string connection_string;
+ // std::string sas_token;
/// \brief Default metadata for OpenOutputStream.
///
/// This will be ignored if non-empty metadata is passed to OpenOutputStream.
std::shared_ptr<const KeyValueMetadata> default_metadata;
+ private:
+ std::string account_blob_url_;
+ std::string account_dfs_url_;
+
+ enum class CredentialKind {
+ kAnonymous,
+ kStorageSharedKeyCredential,
+ } credential_kind_ = CredentialKind::kAnonymous;
+
+ std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
+ storage_shared_key_credential_;
+
+ public:
AzureOptions();
+ ~AzureOptions();
- Status ConfigureAccountKeyCredentials(const std::string& account_name,
- const std::string& account_key);
+ Status ConfigureAccountKeyCredential(const std::string& account_name,
+ const std::string& account_key);
bool Equals(const AzureOptions& other) const;
+
+ const std::string& AccountBlobUrl() const { return account_blob_url_; }
+ const std::string& AccountDfsUrl() const { return account_dfs_url_; }
+
+ Result<std::unique_ptr<Azure::Storage::Blobs::BlobServiceClient>>
+ MakeBlobServiceClient() const;
+
+
Result<std::unique_ptr<Azure::Storage::Files::DataLake::DataLakeServiceClient>>
+ MakeDataLakeServiceClient() const;
};
-/// \brief Azure-backed FileSystem implementation for ABFS and ADLS.
+/// \brief FileSystem implementation backed by Azure Blob Storage (ABS) [1] and
+/// Azure Data Lake Storage Gen2 (ADLS Gen2) [2].
+///
+/// ADLS Gen2 isn't a dedicated service or account type. It's a set of
capabilities that
+/// support high throughput analytic workloads, built on Azure Blob Storage.
All the data
+/// ingested via the ADLS Gen2 APIs is persisted as blobs in the storage
account.
+/// ADLS Gen2 provides filesystem semantics, file-level security, and Hadoop
+/// compatibility. ADLS Gen1 exists as a separate object that will retired on
2024-02-29
+/// and new ADLS accounts use Gen2 instead.
///
-/// ABFS (Azure Blob Storage -
https://azure.microsoft.com/en-us/products/storage/blobs/)
-/// object-based cloud storage system.
+/// ADLS Gen2 and Blob APIs can operate on the same data, but there are
+/// some limitations [3]. The ones that are relevant to this
+/// implementation are listed here:
///
-/// ADLS (Azure Data Lake Storage -
-/// https://azure.microsoft.com/en-us/products/storage/data-lake-storage/)
-/// is a scalable data storage system designed for big-data applications.
-/// ADLS provides filesystem semantics, file-level security, and Hadoop
-/// compatibility. Gen1 exists as a separate object that will retired
-/// on Feb 29, 2024. New ADLS accounts will use Gen2 instead, which is
-/// implemented on top of ABFS.
+/// - You can't use Blob APIs, and ADLS APIs to write to the same instance of
a file. If
+/// you write to a file by using ADLS APIs then that file's blocks won't be
visible
+/// to calls to the GetBlockList Blob API. The only exception is when you're
+/// overwriting.
+/// - When you use the ListBlobs operation without specifying a delimiter, the
results
+/// include both directories and blobs. If you choose to use a delimiter,
use only a
+/// forward slash (/) -- the only supported delimiter.
+/// - If you use the DeleteBlob API to delete a directory, that directory is
deleted only
+/// if it's empty. This means that you can't use the Blob API delete
directories
+/// recursively.
///
-/// TODO: GH-18014 Complete the internal implementation
-/// and review the documentation
+/// [1]: https://azure.microsoft.com/en-us/products/storage/blobs
+/// [2]: https://azure.microsoft.com/en-us/products/storage/data-lake-storage
+/// [3]:
+///
https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-known-issues
class ARROW_EXPORT AzureFileSystem : public FileSystem {
+ private:
+ class Impl;
+ std::unique_ptr<Impl> impl_;
+
+ explicit AzureFileSystem(std::unique_ptr<Impl>&& impl);
+
public:
~AzureFileSystem() override = default;
+ static Result<std::shared_ptr<AzureFileSystem>> Make(
+ const AzureOptions& options, const io::IOContext& =
io::default_io_context());
+
std::string type_name() const override { return "abfs"; }
/// Return the original Azure options when constructing the filesystem
@@ -152,16 +180,6 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem {
Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
const std::string& path,
const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;
-
- static Result<std::shared_ptr<AzureFileSystem>> Make(
- const AzureOptions& options, const io::IOContext& =
io::default_io_context());
-
- private:
- AzureFileSystem(const AzureOptions& options, const io::IOContext&
io_context);
-
- class Impl;
- std::unique_ptr<Impl> impl_;
};
-} // namespace fs
-} // namespace arrow
+} // namespace arrow::fs
diff --git a/cpp/src/arrow/filesystem/azurefs_internal.cc
b/cpp/src/arrow/filesystem/azurefs_internal.cc
index 3e545d670c..39c3fb23e3 100644
--- a/cpp/src/arrow/filesystem/azurefs_internal.cc
+++ b/cpp/src/arrow/filesystem/azurefs_internal.cc
@@ -23,11 +23,17 @@
namespace arrow::fs::internal {
+namespace {
+
+// TODO(GH-38772): Remove azurefs_internal.h/.cc by moving the detector to
+// azurefs.cc (which contains a private copy of this helper function already).
Status ExceptionToStatus(const std::string& prefix,
const Azure::Storage::StorageException& exception) {
return Status::IOError(prefix, " Azure Error: ", exception.what());
}
+} // namespace
+
Status HierarchicalNamespaceDetector::Init(
Azure::Storage::Files::DataLake::DataLakeServiceClient*
datalake_service_client) {
datalake_service_client_ = datalake_service_client;
diff --git a/cpp/src/arrow/filesystem/azurefs_internal.h
b/cpp/src/arrow/filesystem/azurefs_internal.h
index c3da96239a..92592cf164 100644
--- a/cpp/src/arrow/filesystem/azurefs_internal.h
+++ b/cpp/src/arrow/filesystem/azurefs_internal.h
@@ -25,9 +25,6 @@
namespace arrow::fs::internal {
-Status ExceptionToStatus(const std::string& prefix,
- const Azure::Storage::StorageException& exception);
-
class HierarchicalNamespaceDetector {
public:
Status Init(
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc
b/cpp/src/arrow/filesystem/azurefs_test.cc
index 792c63b209..463ff4e8da 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -36,6 +36,7 @@
#include "arrow/filesystem/azurefs.h"
#include "arrow/filesystem/azurefs_internal.h"
+#include <memory>
#include <random>
#include <string>
@@ -63,6 +64,7 @@
namespace arrow {
using internal::TemporaryDir;
namespace fs {
+using internal::ConcatAbstractPath;
namespace {
namespace bp = boost::process;
@@ -71,56 +73,133 @@ using ::testing::Not;
using ::testing::NotNull;
namespace Blobs = Azure::Storage::Blobs;
-namespace Files = Azure::Storage::Files;
+namespace Core = Azure::Core;
+namespace DataLake = Azure::Storage::Files::DataLake;
-auto const* kLoremIpsum = R"""(
-Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
-incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
-nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
-Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
-fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
-culpa qui officia deserunt mollit anim id est laborum.
-)""";
+class BaseAzureEnv : public ::testing::Environment {
+ protected:
+ std::string account_name_;
+ std::string account_key_;
+
+ BaseAzureEnv(std::string account_name, std::string account_key)
+ : account_name_(std::move(account_name)),
account_key_(std::move(account_key)) {}
-class AzuriteEnv : public ::testing::Environment {
public:
- AzuriteEnv() {
- account_name_ = "devstoreaccount1";
- account_key_ =
-
"Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/"
- "KBHBeksoGMGw==";
- auto exe_path = bp::search_path("azurite");
- if (exe_path.empty()) {
- auto error = std::string("Could not find Azurite emulator.");
- status_ = Status::Invalid(error);
- return;
+ const std::string& account_name() const { return account_name_; }
+ const std::string& account_key() const { return account_key_; }
+
+ virtual AzureBackend backend() const = 0;
+
+ virtual bool WithHierarchicalNamespace() const { return false; }
+
+ virtual Result<int64_t> GetDebugLogSize() { return 0; }
+ virtual Status DumpDebugLog(int64_t position) {
+ return Status::NotImplemented("BaseAzureEnv::DumpDebugLog");
+ }
+};
+
+template <class AzureEnvClass>
+class AzureEnvImpl : public BaseAzureEnv {
+ private:
+ /// \brief Factory function that registers the singleton instance as a
global test
+ /// environment. Must be called only once per implementation (see
GetInstance()).
+ ///
+ /// Every BaseAzureEnv implementation defines a static and parameter-less
member
+ /// function called Make() that returns a
Result<std::unique_ptr<BaseAzureEnv>>.
+ /// This templated function performs the following steps:
+ ///
+ /// 1) Calls AzureEnvClass::Make() to get an instance of AzureEnvClass.
+ /// 2) Passes ownership of the AzureEnvClass instance to the testing
environment.
+ /// 3) Returns a Result<BaseAzureEnv*> wrapping the raw heap-allocated
pointer.
+ static Result<BaseAzureEnv*> MakeAndAddToGlobalTestEnvironment() {
+ ARROW_ASSIGN_OR_RAISE(auto env, AzureEnvClass::Make());
+ auto* heap_ptr = env.release();
+ ::testing::AddGlobalTestEnvironment(heap_ptr);
+ return heap_ptr;
+ }
+
+ protected:
+ using BaseAzureEnv::BaseAzureEnv;
+
+ /// \brief Create an AzureEnvClass instance from environment variables.
+ ///
+ /// Reads the account name and key from the environment variables. This can
be
+ /// used in BaseAzureEnv implementations that don't need to do any additional
+ /// setup to create the singleton instance (e.g. AzureFlatNSEnv,
+ /// AzureHierarchicalNSEnv).
+ static Result<std::unique_ptr<AzureEnvClass>> MakeFromEnvVars(
+ const std::string& account_name_var, const std::string& account_key_var)
{
+ const auto account_name = std::getenv(account_name_var.c_str());
+ const auto account_key = std::getenv(account_key_var.c_str());
+ if (!account_name && !account_key) {
+ return Status::Cancelled(account_name_var + " and " + account_key_var +
+ " are not set. Skipping tests.");
}
- auto temp_dir_ = *TemporaryDir::Make("azurefs-test-");
- auto debug_log_path_result = temp_dir_->path().Join("debug.log");
- if (!debug_log_path_result.ok()) {
- status_ = debug_log_path_result.status();
- return;
+ // If only one of the variables is set. Don't cancel tests,
+ // fail with a Status::Invalid.
+ if (!account_name) {
+ return Status::Invalid(account_name_var + " not set while " +
account_key_var +
+ " is set.");
}
- debug_log_path_ = *debug_log_path_result;
- server_process_ =
- bp::child(boost::this_process::environment(), exe_path, "--silent",
"--location",
- temp_dir_->path().ToString(), "--debug",
debug_log_path_.ToString());
- if (!(server_process_.valid() && server_process_.running())) {
- auto error = "Could not start Azurite emulator.";
- server_process_.terminate();
- server_process_.wait();
- status_ = Status::Invalid(error);
- return;
+ if (!account_key) {
+ return Status::Invalid(account_key_var + " not set while " +
account_name_var +
+ " is set.");
}
- status_ = Status::OK();
+ return std::unique_ptr<AzureEnvClass>{new AzureEnvClass(account_name,
account_key)};
}
+ public:
+ static Result<BaseAzureEnv*> GetInstance() {
+ // Ensure MakeAndAddToGlobalTestEnvironment() is called only once by
storing the
+ // Result<BaseAzureEnv*> in a static variable.
+ static auto singleton_env = MakeAndAddToGlobalTestEnvironment();
+ return singleton_env;
+ }
+
+ AzureBackend backend() const final { return AzureEnvClass::kBackend; }
+};
+
+class AzuriteEnv : public AzureEnvImpl<AzuriteEnv> {
+ private:
+ std::unique_ptr<TemporaryDir> temp_dir_;
+ arrow::internal::PlatformFilename debug_log_path_;
+ bp::child server_process_;
+
+ using AzureEnvImpl::AzureEnvImpl;
+
+ public:
+ static const AzureBackend kBackend = AzureBackend::kAzurite;
+
~AzuriteEnv() override {
server_process_.terminate();
server_process_.wait();
}
- Result<int64_t> GetDebugLogSize() {
+ static Result<std::unique_ptr<AzureEnvImpl>> Make() {
+ auto self = std::unique_ptr<AzuriteEnv>(
+ new AzuriteEnv("devstoreaccount1",
+
"Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/"
+ "K1SZFPTOtr/KBHBeksoGMGw=="));
+ auto exe_path = bp::search_path("azurite");
+ if (exe_path.empty()) {
+ return Status::Invalid("Could not find Azurite emulator.");
+ }
+ ARROW_ASSIGN_OR_RAISE(self->temp_dir_,
TemporaryDir::Make("azurefs-test-"));
+ ARROW_ASSIGN_OR_RAISE(self->debug_log_path_,
+ self->temp_dir_->path().Join("debug.log"));
+ auto server_process = bp::child(
+ boost::this_process::environment(), exe_path, "--silent", "--location",
+ self->temp_dir_->path().ToString(), "--debug",
self->debug_log_path_.ToString());
+ if (!server_process.valid() || !server_process.running()) {
+ server_process.terminate();
+ server_process.wait();
+ return Status::Invalid("Could not start Azurite emulator.");
+ }
+ self->server_process_ = std::move(server_process);
+ return self;
+ }
+
+ Result<int64_t> GetDebugLogSize() override {
ARROW_ASSIGN_OR_RAISE(auto exists,
arrow::internal::FileExists(debug_log_path_));
if (!exists) {
return 0;
@@ -131,7 +210,7 @@ class AzuriteEnv : public ::testing::Environment {
return arrow::internal::FileTell(file_descriptor.fd());
}
- Status DumpDebugLog(int64_t position = 0) {
+ Status DumpDebugLog(int64_t position) override {
ARROW_ASSIGN_OR_RAISE(auto exists,
arrow::internal::FileExists(debug_log_path_));
if (!exists) {
return Status::OK();
@@ -157,25 +236,35 @@ class AzuriteEnv : public ::testing::Environment {
std::cerr << std::endl;
return Status::OK();
}
+};
- const std::string& account_name() const { return account_name_; }
- const std::string& account_key() const { return account_key_; }
- const Status status() const { return status_; }
-
+class AzureFlatNSEnv : public AzureEnvImpl<AzureFlatNSEnv> {
private:
- std::string account_name_;
- std::string account_key_;
- bp::child server_process_;
- Status status_;
- std::unique_ptr<TemporaryDir> temp_dir_;
- arrow::internal::PlatformFilename debug_log_path_;
+ using AzureEnvImpl::AzureEnvImpl;
+
+ public:
+ static const AzureBackend kBackend = AzureBackend::kAzure;
+
+ static Result<std::unique_ptr<AzureFlatNSEnv>> Make() {
+ return MakeFromEnvVars("AZURE_FLAT_NAMESPACE_ACCOUNT_NAME",
+ "AZURE_FLAT_NAMESPACE_ACCOUNT_KEY");
+ }
};
-auto* azurite_env = ::testing::AddGlobalTestEnvironment(new AzuriteEnv);
+class AzureHierarchicalNSEnv : public AzureEnvImpl<AzureHierarchicalNSEnv> {
+ private:
+ using AzureEnvImpl::AzureEnvImpl;
-AzuriteEnv* GetAzuriteEnv() {
- return ::arrow::internal::checked_cast<AzuriteEnv*>(azurite_env);
-}
+ public:
+ static const AzureBackend kBackend = AzureBackend::kAzure;
+
+ static Result<std::unique_ptr<AzureHierarchicalNSEnv>> Make() {
+ return MakeFromEnvVars("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME",
+ "AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY");
+ }
+
+ bool WithHierarchicalNamespace() const final { return true; }
+};
// Placeholder tests
// TODO: GH-18014 Remove once a proper test is added
@@ -193,44 +282,110 @@ TEST(AzureFileSystem, OptionsCompare) {
EXPECT_TRUE(options.Equals(options));
}
-class AzureFileSystemTest : public ::testing::Test {
+struct PreexistingData {
+ public:
+ using RNG = std::mt19937_64;
+
+ public:
+ const std::string container_name;
+ static constexpr char const* kObjectName = "test-object-name";
+
+ static constexpr char const* kLoremIpsum = R"""(
+Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
+incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis
+nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
+Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
+fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in
+culpa qui officia deserunt mollit anim id est laborum.
+)""";
+
public:
+ explicit PreexistingData(RNG& rng) :
container_name{RandomContainerName(rng)} {}
+
+ // Creates a path by concatenating the container name and the stem.
+ std::string ContainerPath(std::string_view stem) const {
+ return ConcatAbstractPath(container_name, stem);
+ }
+
+ std::string ObjectPath() const { return ContainerPath(kObjectName); }
+ std::string NotFoundObjectPath() const { return ContainerPath("not-found"); }
+
+ std::string RandomDirectoryPath(RNG& rng) const {
+ return ContainerPath(RandomChars(32, rng));
+ }
+
+ // Utilities
+ static std::string RandomContainerName(RNG& rng) { return RandomChars(32,
rng); }
+
+ static std::string RandomChars(int count, RNG& rng) {
+ auto const fillers = std::string("abcdefghijlkmnopqrstuvwxyz0123456789");
+ std::uniform_int_distribution<int> d(0, static_cast<int>(fillers.size()) -
1);
+ std::string s;
+ std::generate_n(std::back_inserter(s), count, [&] { return
fillers[d(rng)]; });
+ return s;
+ }
+
+ static int RandomIndex(int end, RNG& rng) {
+ return std::uniform_int_distribution<int>(0, end - 1)(rng);
+ }
+
+ static std::string RandomLine(int lineno, int width, RNG& rng) {
+ auto line = std::to_string(lineno) + ": ";
+ line += RandomChars(width - static_cast<int>(line.size()) - 1, rng);
+ line += '\n';
+ return line;
+ }
+};
+
+class TestAzureFileSystem : public ::testing::Test {
+ protected:
+ // Set in constructor
+ std::mt19937_64 rng_;
+
+ // Set in SetUp()
+ int64_t debug_log_start_ = 0;
+ bool set_up_succeeded_ = false;
+ AzureOptions options_;
+
std::shared_ptr<FileSystem> fs_;
std::unique_ptr<Blobs::BlobServiceClient> blob_service_client_;
- std::unique_ptr<Files::DataLake::DataLakeServiceClient>
datalake_service_client_;
- AzureOptions options_;
- std::mt19937_64 generator_;
- std::string container_name_;
- bool suite_skipped_ = false;
+ std::unique_ptr<DataLake::DataLakeServiceClient> datalake_service_client_;
+
+ public:
+ TestAzureFileSystem() : rng_(std::random_device()()) {}
- AzureFileSystemTest() : generator_(std::random_device()()) {}
+ virtual Result<BaseAzureEnv*> GetAzureEnv() const = 0;
- virtual Result<AzureOptions> MakeOptions() = 0;
+ static Result<AzureOptions> MakeOptions(BaseAzureEnv* env) {
+ AzureOptions options;
+ options.backend = env->backend();
+ ARROW_EXPECT_OK(
+ options.ConfigureAccountKeyCredential(env->account_name(),
env->account_key()));
+ return options;
+ }
void SetUp() override {
- auto options = MakeOptions();
- if (options.ok()) {
- options_ = *options;
+ auto make_options = [this]() -> Result<AzureOptions> {
+ ARROW_ASSIGN_OR_RAISE(auto env, GetAzureEnv());
+ EXPECT_THAT(env, NotNull());
+ ARROW_ASSIGN_OR_RAISE(debug_log_start_, env->GetDebugLogSize());
+ return MakeOptions(env);
+ };
+ auto options_res = make_options();
+ if (options_res.status().IsCancelled()) {
+ GTEST_SKIP() << options_res.status().message();
} else {
- suite_skipped_ = true;
- GTEST_SKIP() << options.status().message();
+ EXPECT_OK_AND_ASSIGN(options_, options_res);
}
- // Stop-gap solution before GH-39119 is fixed.
- container_name_ = "z" + RandomChars(31);
- blob_service_client_ = std::make_unique<Blobs::BlobServiceClient>(
- options_.account_blob_url, options_.storage_credentials_provider);
- datalake_service_client_ =
std::make_unique<Files::DataLake::DataLakeServiceClient>(
- options_.account_dfs_url, options_.storage_credentials_provider);
- ASSERT_OK_AND_ASSIGN(fs_, AzureFileSystem::Make(options_));
- auto container_client = CreateContainer(container_name_);
- auto blob_client =
container_client.GetBlockBlobClient(PreexistingObjectName());
- blob_client.UploadFrom(reinterpret_cast<const uint8_t*>(kLoremIpsum),
- strlen(kLoremIpsum));
+ ASSERT_OK_AND_ASSIGN(fs_, AzureFileSystem::Make(options_));
+ EXPECT_OK_AND_ASSIGN(blob_service_client_,
options_.MakeBlobServiceClient());
+ EXPECT_OK_AND_ASSIGN(datalake_service_client_,
options_.MakeDataLakeServiceClient());
+ set_up_succeeded_ = true;
}
void TearDown() override {
- if (!suite_skipped_) {
+ if (set_up_succeeded_) {
auto containers = blob_service_client_->ListBlobContainers();
for (auto container : containers.BlobContainers) {
auto container_client =
@@ -238,6 +393,13 @@ class AzureFileSystemTest : public ::testing::Test {
container_client.DeleteIfExists();
}
}
+ if (HasFailure()) {
+ // XXX: This may not include all logs in the target test because
+ // Azurite doesn't flush debug logs immediately... You may want
+ // to check the log manually...
+ EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
+ ARROW_IGNORE_EXPR(env->DumpDebugLog(debug_log_start_));
+ }
}
Blobs::BlobContainerClient CreateContainer(const std::string& name) {
@@ -254,54 +416,20 @@ class AzureFileSystemTest : public ::testing::Test {
return blob_client;
}
- std::string PreexistingContainerName() const { return container_name_; }
-
- std::string PreexistingContainerPath() const {
- return PreexistingContainerName() + '/';
- }
-
- static std::string PreexistingObjectName() { return "test-object-name"; }
-
- std::string PreexistingObjectPath() const {
- return PreexistingContainerPath() + PreexistingObjectName();
- }
-
- std::string NotFoundObjectPath() { return PreexistingContainerPath() +
"not-found"; }
-
- std::string RandomLine(int lineno, std::size_t width) {
- auto line = std::to_string(lineno) + ": ";
- line += RandomChars(width - line.size() - 1);
- line += '\n';
- return line;
- }
-
- std::size_t RandomIndex(std::size_t end) {
- return std::uniform_int_distribution<std::size_t>(0, end - 1)(generator_);
- }
-
- std::string RandomChars(std::size_t count) {
- auto const fillers = std::string("abcdefghijlkmnopqrstuvwxyz0123456789");
- std::uniform_int_distribution<std::size_t> d(0, fillers.size() - 1);
- std::string s;
- std::generate_n(std::back_inserter(s), count, [&] { return
fillers[d(generator_)]; });
- return s;
- }
-
- std::string RandomContainerName() { return RandomChars(32); }
-
- std::string RandomDirectoryName() { return RandomChars(32); }
-
- void UploadLines(const std::vector<std::string>& lines, const char*
path_to_file,
+ void UploadLines(const std::vector<std::string>& lines, const std::string&
path,
int total_size) {
- const auto path = PreexistingContainerPath() + path_to_file;
ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
const auto all_lines = std::accumulate(lines.begin(), lines.end(),
std::string(""));
ASSERT_OK(output->Write(all_lines));
ASSERT_OK(output->Close());
}
- void RunGetFileInfoObjectWithNestedStructureTest();
- void RunGetFileInfoObjectTest();
+ PreexistingData SetUpPreexistingData() {
+ PreexistingData data(rng_);
+ auto container_client = CreateContainer(data.container_name);
+ CreateBlob(container_client, data.kObjectName,
PreexistingData::kLoremIpsum);
+ return data;
+ }
struct HierarchicalPaths {
std::string container;
@@ -310,15 +438,12 @@ class AzureFileSystemTest : public ::testing::Test {
};
// Need to use "void" as the return type to use ASSERT_* in this method.
- void CreateHierarchicalData(HierarchicalPaths& paths) {
- const auto container_path = RandomContainerName();
- const auto directory_path =
- internal::ConcatAbstractPath(container_path, RandomDirectoryName());
- const auto sub_directory_path =
- internal::ConcatAbstractPath(directory_path, "new-sub");
- const auto sub_blob_path =
- internal::ConcatAbstractPath(sub_directory_path, "sub.txt");
- const auto top_blob_path = internal::ConcatAbstractPath(directory_path,
"top.txt");
+ void CreateHierarchicalData(HierarchicalPaths* paths) {
+ auto data = SetUpPreexistingData();
+ const auto directory_path = data.RandomDirectoryPath(rng_);
+ const auto sub_directory_path = ConcatAbstractPath(directory_path,
"new-sub");
+ const auto sub_blob_path = ConcatAbstractPath(sub_directory_path,
"sub.txt");
+ const auto top_blob_path = ConcatAbstractPath(directory_path, "top.txt");
ASSERT_OK(fs_->CreateDir(sub_directory_path, true));
ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(sub_blob_path));
ASSERT_OK(output->Write(std::string_view("sub")));
@@ -327,15 +452,15 @@ class AzureFileSystemTest : public ::testing::Test {
ASSERT_OK(output->Write(std::string_view("top")));
ASSERT_OK(output->Close());
- AssertFileInfo(fs_.get(), container_path, FileType::Directory);
+ AssertFileInfo(fs_.get(), data.container_name, FileType::Directory);
AssertFileInfo(fs_.get(), directory_path, FileType::Directory);
AssertFileInfo(fs_.get(), sub_directory_path, FileType::Directory);
AssertFileInfo(fs_.get(), sub_blob_path, FileType::File);
AssertFileInfo(fs_.get(), top_blob_path, FileType::File);
- paths.container = container_path;
- paths.directory = directory_path;
- paths.sub_paths = {
+ paths->container = data.container_name;
+ paths->directory = directory_path;
+ paths->sub_paths = {
sub_directory_path,
sub_blob_path,
top_blob_path,
@@ -362,7 +487,7 @@ class AzureFileSystemTest : public ::testing::Test {
}
void AssertInfoAllContainersRecursive(const std::vector<FileInfo>& infos) {
- ASSERT_EQ(infos.size(), 14);
+ ASSERT_EQ(infos.size(), 12);
AssertFileInfo(infos[0], "container", FileType::Directory);
AssertFileInfo(infos[1], "container/emptydir", FileType::Directory);
AssertFileInfo(infos[2], "container/otherdir", FileType::Directory);
@@ -377,202 +502,336 @@ class AzureFileSystemTest : public ::testing::Test {
strlen(kSubData));
AssertFileInfo(infos[10], "container/somefile", FileType::File,
strlen(kSomeData));
AssertFileInfo(infos[11], "empty-container", FileType::Directory);
- AssertFileInfo(infos[12], PreexistingContainerName(), FileType::Directory);
- AssertFileInfo(infos[13], PreexistingObjectPath(), FileType::File);
}
-};
-class AzuriteFileSystemTest : public AzureFileSystemTest {
- Result<AzureOptions> MakeOptions() override {
- EXPECT_THAT(GetAzuriteEnv(), NotNull());
- ARROW_EXPECT_OK(GetAzuriteEnv()->status());
- ARROW_ASSIGN_OR_RAISE(debug_log_start_,
GetAzuriteEnv()->GetDebugLogSize());
- AzureOptions options;
- options.backend = AzureBackend::Azurite;
- ARROW_EXPECT_OK(options.ConfigureAccountKeyCredentials(
- GetAzuriteEnv()->account_name(), GetAzuriteEnv()->account_key()));
- return options;
+ bool WithHierarchicalNamespace() const {
+ EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
+ return env->WithHierarchicalNamespace();
}
- void TearDown() override {
- AzureFileSystemTest::TearDown();
- if (HasFailure()) {
- // XXX: This may not include all logs in the target test because
- // Azurite doesn't flush debug logs immediately... You may want
- // to check the log manually...
- ARROW_IGNORE_EXPR(GetAzuriteEnv()->DumpDebugLog(debug_log_start_));
+ // Tests that are called from more than one implementation of
TestAzureFileSystem
+
+ void TestDetectHierarchicalNamespace();
+ void TestGetFileInfoObject();
+ void TestGetFileInfoObjectWithNestedStructure();
+
+ void TestDeleteDirSuccessEmpty() {
+ auto data = SetUpPreexistingData();
+ const auto directory_path = data.RandomDirectoryPath(rng_);
+
+ if (WithHierarchicalNamespace()) {
+ ASSERT_OK(fs_->CreateDir(directory_path, true));
+ arrow::fs::AssertFileInfo(fs_.get(), directory_path,
FileType::Directory);
+ ASSERT_OK(fs_->DeleteDir(directory_path));
+ arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+ } else {
+ // There is only virtual directory without hierarchical namespace
+ // support. So the CreateDir() and DeleteDir() do nothing.
+ ASSERT_OK(fs_->CreateDir(directory_path));
+ arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+ ASSERT_OK(fs_->DeleteDir(directory_path));
+ arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
}
}
- int64_t debug_log_start_ = 0;
-};
+ void TestCreateDirSuccessContainerAndDirectory() {
+ auto data = SetUpPreexistingData();
+ const auto path = data.RandomDirectoryPath(rng_);
+ ASSERT_OK(fs_->CreateDir(path, false));
+ if (WithHierarchicalNamespace()) {
+ arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+ } else {
+ // There is only virtual directory without hierarchical namespace
+ // support. So the CreateDir() does nothing.
+ arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+ }
+ }
-class AzureFlatNamespaceFileSystemTest : public AzureFileSystemTest {
- Result<AzureOptions> MakeOptions() override {
- AzureOptions options;
- const auto account_key = std::getenv("AZURE_FLAT_NAMESPACE_ACCOUNT_KEY");
- const auto account_name = std::getenv("AZURE_FLAT_NAMESPACE_ACCOUNT_NAME");
- if (account_key && account_name) {
- RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name,
account_key));
- return options;
+ void TestCreateDirRecursiveSuccessContainerOnly() {
+ auto container_name = PreexistingData::RandomContainerName(rng_);
+ ASSERT_OK(fs_->CreateDir(container_name, true));
+ arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
+ }
+
+ void TestCreateDirRecursiveSuccessDirectoryOnly() {
+ auto data = SetUpPreexistingData();
+ const auto parent = data.RandomDirectoryPath(rng_);
+ const auto path = ConcatAbstractPath(parent, "new-sub");
+ ASSERT_OK(fs_->CreateDir(path, true));
+ if (WithHierarchicalNamespace()) {
+ arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+ arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
+ } else {
+ // There is only virtual directory without hierarchical namespace
+ // support. So the CreateDir() does nothing.
+ arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+ arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
}
- return Status::Cancelled(
- "Connection details not provided for a real flat namespace "
- "account.");
}
-};
-// How to enable this test:
-//
-// You need an Azure account. You should be able to create a free
-// account at https://azure.microsoft.com/en-gb/free/ . You should be
-// able to create a storage account through the portal Web UI.
-//
-// See also the official document how to create a storage account:
-//
https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account
-//
-// A few suggestions on configuration:
-//
-// * Use Standard general-purpose v2 not premium
-// * Use LRS redundancy
-// * Obviously you need to enable hierarchical namespace.
-// * Set the default access tier to hot
-// * SFTP, NFS and file shares are not required.
-class AzureHierarchicalNamespaceFileSystemTest : public AzureFileSystemTest {
- Result<AzureOptions> MakeOptions() override {
- AzureOptions options;
- const auto account_key =
std::getenv("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY");
- const auto account_name =
std::getenv("AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME");
- if (account_key && account_name) {
- RETURN_NOT_OK(options.ConfigureAccountKeyCredentials(account_name,
account_key));
- return options;
+ void TestCreateDirRecursiveSuccessContainerAndDirectory() {
+ auto data = SetUpPreexistingData();
+ const auto parent = data.RandomDirectoryPath(rng_);
+ const auto path = ConcatAbstractPath(parent, "new-sub");
+ ASSERT_OK(fs_->CreateDir(path, true));
+ if (WithHierarchicalNamespace()) {
+ arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+ arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
+ arrow::fs::AssertFileInfo(fs_.get(), data.container_name,
FileType::Directory);
+ } else {
+ // There is only virtual directory without hierarchical namespace
+ // support. So the CreateDir() does nothing.
+ arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+ arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
+ arrow::fs::AssertFileInfo(fs_.get(), data.container_name,
FileType::Directory);
}
- return Status::Cancelled(
- "Connection details not provided for a real hierarchical namespace "
- "account.");
}
-};
-TEST_F(AzureFlatNamespaceFileSystemTest, DetectHierarchicalNamespace) {
- auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
- ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
- ASSERT_OK_AND_EQ(false,
hierarchical_namespace.Enabled(PreexistingContainerName()));
-}
+ void TestDeleteDirContentsSuccessNonexistent() {
+ auto data = SetUpPreexistingData();
+ const auto directory_path = data.RandomDirectoryPath(rng_);
+ ASSERT_OK(fs_->DeleteDirContents(directory_path, true));
+ arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+ }
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, DetectHierarchicalNamespace) {
- auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
- ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
- ASSERT_OK_AND_EQ(true,
hierarchical_namespace.Enabled(PreexistingContainerName()));
-}
+ void TestDeleteDirContentsFailureNonexistent() {
+ auto data = SetUpPreexistingData();
+ const auto directory_path = data.RandomDirectoryPath(rng_);
+ ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false));
+ }
+};
-TEST_F(AzuriteFileSystemTest, DetectHierarchicalNamespace) {
- auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
- ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
- ASSERT_OK_AND_EQ(false,
hierarchical_namespace.Enabled(PreexistingContainerName()));
-}
+void TestAzureFileSystem::TestDetectHierarchicalNamespace() {
+ // Check the environments are implemented and injected here correctly.
+ auto expected = WithHierarchicalNamespace();
-TEST_F(AzuriteFileSystemTest,
DetectHierarchicalNamespaceFailsWithMissingContainer) {
+ auto data = SetUpPreexistingData();
auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
- ASSERT_NOT_OK(hierarchical_namespace.Enabled("nonexistent-container"));
+ ASSERT_OK_AND_EQ(expected,
hierarchical_namespace.Enabled(data.container_name));
}
-TEST_F(AzuriteFileSystemTest, GetFileInfoAccount) {
- AssertFileInfo(fs_.get(), "", FileType::Directory);
-
- // URI
- ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://"));
-}
-
-TEST_F(AzuriteFileSystemTest, GetFileInfoContainer) {
- AssertFileInfo(fs_.get(), PreexistingContainerName(), FileType::Directory);
+void TestAzureFileSystem::TestGetFileInfoObject() {
+ auto data = SetUpPreexistingData();
+ auto object_properties =
+ blob_service_client_->GetBlobContainerClient(data.container_name)
+ .GetBlobClient(data.kObjectName)
+ .GetProperties()
+ .Value;
- AssertFileInfo(fs_.get(), "nonexistent-container", FileType::NotFound);
+ AssertFileInfo(fs_.get(), data.ObjectPath(), FileType::File,
+
std::chrono::system_clock::time_point{object_properties.LastModified},
+ static_cast<int64_t>(object_properties.BlobSize));
// URI
- ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" +
PreexistingContainerName()));
+ ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" +
std::string{data.kObjectName}));
}
-void AzureFileSystemTest::RunGetFileInfoObjectWithNestedStructureTest() {
+void TestAzureFileSystem::TestGetFileInfoObjectWithNestedStructure() {
+ auto data = SetUpPreexistingData();
// Adds detailed tests to handle cases of different edge cases
// with directory naming conventions (e.g. with and without slashes).
- constexpr auto kObjectName =
"test-object-dir/some_other_dir/another_dir/foo";
- ASSERT_OK_AND_ASSIGN(
- auto output,
- fs_->OpenOutputStream(PreexistingContainerPath() + kObjectName,
/*metadata=*/{}));
- const std::string_view data(kLoremIpsum);
- ASSERT_OK(output->Write(data));
+ const std::string kObjectName =
"test-object-dir/some_other_dir/another_dir/foo";
+ ASSERT_OK_AND_ASSIGN(auto output,
fs_->OpenOutputStream(data.ContainerPath(kObjectName),
+ /*metadata=*/{}));
+ const std::string_view lorem_ipsum(PreexistingData::kLoremIpsum);
+ ASSERT_OK(output->Write(lorem_ipsum));
ASSERT_OK(output->Close());
// 0 is immediately after "/" lexicographically, ensure that this doesn't
// cause unexpected issues.
- ASSERT_OK_AND_ASSIGN(output,
- fs_->OpenOutputStream(
- PreexistingContainerPath() +
"test-object-dir/some_other_dir0",
- /*metadata=*/{}));
- ASSERT_OK(output->Write(data));
- ASSERT_OK(output->Close());
ASSERT_OK_AND_ASSIGN(
- output, fs_->OpenOutputStream(PreexistingContainerPath() + kObjectName +
"0",
+ output,
fs_->OpenOutputStream(data.ContainerPath("test-object-dir/some_other_dir0"),
/*metadata=*/{}));
- ASSERT_OK(output->Write(data));
+ ASSERT_OK(output->Write(lorem_ipsum));
+ ASSERT_OK(output->Close());
+ ASSERT_OK_AND_ASSIGN(output,
+ fs_->OpenOutputStream(data.ContainerPath(kObjectName +
"0"),
+ /*metadata=*/{}));
+ ASSERT_OK(output->Write(lorem_ipsum));
ASSERT_OK(output->Close());
- AssertFileInfo(fs_.get(), PreexistingContainerPath() + kObjectName,
FileType::File);
- AssertFileInfo(fs_.get(), PreexistingContainerPath() + kObjectName + "/",
- FileType::NotFound);
- AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir",
- FileType::Directory);
- AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-dir/",
+ AssertFileInfo(fs_.get(), data.ContainerPath(kObjectName), FileType::File);
+ AssertFileInfo(fs_.get(), data.ContainerPath(kObjectName) + "/",
FileType::NotFound);
+ AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir"),
FileType::Directory);
+ AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir") + "/",
FileType::Directory);
- AssertFileInfo(fs_.get(), PreexistingContainerPath() +
"test-object-dir/some_other_dir",
+ AssertFileInfo(fs_.get(),
data.ContainerPath("test-object-dir/some_other_dir"),
FileType::Directory);
- AssertFileInfo(fs_.get(),
- PreexistingContainerPath() +
"test-object-dir/some_other_dir/",
+ AssertFileInfo(fs_.get(),
data.ContainerPath("test-object-dir/some_other_dir") + "/",
FileType::Directory);
- AssertFileInfo(fs_.get(), PreexistingContainerPath() + "test-object-di",
- FileType::NotFound);
- AssertFileInfo(fs_.get(), PreexistingContainerPath() +
"test-object-dir/some_other_di",
+ AssertFileInfo(fs_.get(), data.ContainerPath("test-object-di"),
FileType::NotFound);
+ AssertFileInfo(fs_.get(),
data.ContainerPath("test-object-dir/some_other_di"),
FileType::NotFound);
+
+ if (WithHierarchicalNamespace()) {
+ datalake_service_client_->GetFileSystemClient(data.container_name)
+ .GetDirectoryClient("test-empty-object-dir")
+ .Create();
+
+ AssertFileInfo(fs_.get(), data.ContainerPath("test-empty-object-dir"),
+ FileType::Directory);
+ }
}
-TEST_F(AzuriteFileSystemTest, GetFileInfoObjectWithNestedStructure) {
- RunGetFileInfoObjectWithNestedStructureTest();
+template <class AzureEnvClass>
+class AzureFileSystemTestImpl : public TestAzureFileSystem {
+ public:
+ using TestAzureFileSystem::TestAzureFileSystem;
+
+ Result<BaseAzureEnv*> GetAzureEnv() const final { return
AzureEnvClass::GetInstance(); }
+};
+
+// How to enable the non-Azurite tests:
+//
+// You need an Azure account. You should be able to create a free account [1].
+// Through the portal Web UI, you should create a storage account [2].
+//
+// A few suggestions on configuration:
+//
+// * Use Standard general-purpose v2 not premium
+// * Use LRS redundancy
+// * Set the default access tier to hot
+// * SFTP, NFS and file shares are not required.
+//
+// You must not enable Hierarchical Namespace on the storage account used for
+// TestAzureFlatNSFileSystem, but you must enable it on the storage account
+// used for TestAzureHierarchicalNSFileSystem.
+//
+// The credentials should be placed in the correct environment variables:
+//
+// * AZURE_FLAT_NAMESPACE_ACCOUNT_NAME
+// * AZURE_FLAT_NAMESPACE_ACCOUNT_KEY
+// * AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_NAME
+// * AZURE_HIERARCHICAL_NAMESPACE_ACCOUNT_KEY
+//
+// [1]: https://azure.microsoft.com/en-gb/free/
+// [2]:
+//
https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account
+using TestAzureFlatNSFileSystem = AzureFileSystemTestImpl<AzureFlatNSEnv>;
+using TestAzureHierarchicalNSFileSystem =
AzureFileSystemTestImpl<AzureHierarchicalNSEnv>;
+using TestAzuriteFileSystem = AzureFileSystemTestImpl<AzuriteEnv>;
+
+// Tests using all the 3 environments (Azurite, Azure w/o HNS (flat), Azure w/
HNS)
+
+template <class AzureEnvClass>
+using AzureFileSystemTestOnAllEnvs = AzureFileSystemTestImpl<AzureEnvClass>;
+
+using AllEnvironments =
+ ::testing::Types<AzuriteEnv, AzureFlatNSEnv, AzureHierarchicalNSEnv>;
+
+TYPED_TEST_SUITE(AzureFileSystemTestOnAllEnvs, AllEnvironments);
+
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, DetectHierarchicalNamespace) {
+ this->TestDetectHierarchicalNamespace();
}
-TEST_F(AzureHierarchicalNamespaceFileSystemTest,
GetFileInfoObjectWithNestedStructure) {
- RunGetFileInfoObjectWithNestedStructureTest();
- datalake_service_client_->GetFileSystemClient(PreexistingContainerName())
- .GetDirectoryClient("test-empty-object-dir")
- .Create();
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObject) {
+ this->TestGetFileInfoObject();
+}
- AssertFileInfo(fs_.get(), PreexistingContainerPath() +
"test-empty-object-dir",
- FileType::Directory);
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, DeleteDirSuccessEmpty) {
+ this->TestDeleteDirSuccessEmpty();
}
-void AzureFileSystemTest::RunGetFileInfoObjectTest() {
- auto object_properties =
- blob_service_client_->GetBlobContainerClient(PreexistingContainerName())
- .GetBlobClient(PreexistingObjectName())
- .GetProperties()
- .Value;
+TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObjectWithNestedStructure)
{
+ this->TestGetFileInfoObjectWithNestedStructure();
+}
- AssertFileInfo(fs_.get(), PreexistingObjectPath(), FileType::File,
-
std::chrono::system_clock::time_point(object_properties.LastModified),
- static_cast<int64_t>(object_properties.BlobSize));
+TYPED_TEST(AzureFileSystemTestOnAllEnvs,
CreateDirSuccessContainerAndDirectory) {
+ this->TestCreateDirSuccessContainerAndDirectory();
+}
+
+TYPED_TEST(AzureFileSystemTestOnAllEnvs,
CreateDirRecursiveSuccessContainerOnly) {
+ this->TestCreateDirRecursiveSuccessContainerOnly();
+}
+
+TYPED_TEST(AzureFileSystemTestOnAllEnvs,
CreateDirRecursiveSuccessDirectoryOnly) {
+ this->TestCreateDirRecursiveSuccessDirectoryOnly();
+}
+
+TYPED_TEST(AzureFileSystemTestOnAllEnvs,
CreateDirRecursiveSuccessContainerAndDirectory) {
+ this->TestCreateDirRecursiveSuccessContainerAndDirectory();
+}
+
+// Tests using a real storage account *with Hierarchical Namespace enabled*
+
+TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirFailureNonexistent) {
+ auto data = SetUpPreexistingData();
+ const auto path = data.RandomDirectoryPath(rng_);
+ ASSERT_RAISES(IOError, fs_->DeleteDir(path));
+}
+
+TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirSuccessHaveBlob) {
+ auto data = SetUpPreexistingData();
+ const auto directory_path = data.RandomDirectoryPath(rng_);
+ const auto blob_path = ConcatAbstractPath(directory_path, "hello.txt");
+ ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path));
+ ASSERT_OK(output->Write(std::string_view("hello")));
+ ASSERT_OK(output->Close());
+ arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::File);
+ ASSERT_OK(fs_->DeleteDir(directory_path));
+ arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound);
+}
+
+TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirSuccessHaveDirectory) {
+ auto data = SetUpPreexistingData();
+ const auto parent = data.RandomDirectoryPath(rng_);
+ const auto path = ConcatAbstractPath(parent, "new-sub");
+ ASSERT_OK(fs_->CreateDir(path, true));
+ arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+ arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
+ ASSERT_OK(fs_->DeleteDir(parent));
+ arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+ arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
+}
+
+TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsSuccessExist) {
+ auto preexisting_data = SetUpPreexistingData();
+ HierarchicalPaths paths;
+ CreateHierarchicalData(&paths);
+ ASSERT_OK(fs_->DeleteDirContents(paths.directory));
+ arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::Directory);
+ for (const auto& sub_path : paths.sub_paths) {
+ arrow::fs::AssertFileInfo(fs_.get(), sub_path, FileType::NotFound);
+ }
+}
+
+TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsSuccessNonexistent)
{
+ this->TestDeleteDirContentsSuccessNonexistent();
+}
+
+TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsFailureNonexistent)
{
+ this->TestDeleteDirContentsFailureNonexistent();
+}
+
+// Tests using Azurite (the local Azure emulator)
+
+TEST_F(TestAzuriteFileSystem,
DetectHierarchicalNamespaceFailsWithMissingContainer) {
+ auto hierarchical_namespace = internal::HierarchicalNamespaceDetector();
+ ASSERT_OK(hierarchical_namespace.Init(datalake_service_client_.get()));
+ ASSERT_RAISES(IOError,
hierarchical_namespace.Enabled("nonexistent-container"));
+}
+
+TEST_F(TestAzuriteFileSystem, GetFileInfoAccount) {
+ AssertFileInfo(fs_.get(), "", FileType::Directory);
// URI
- ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" +
PreexistingObjectName()));
+ ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://"));
}
-TEST_F(AzuriteFileSystemTest, GetFileInfoObject) { RunGetFileInfoObjectTest();
}
+TEST_F(TestAzuriteFileSystem, GetFileInfoContainer) {
+ auto data = SetUpPreexistingData();
+ AssertFileInfo(fs_.get(), data.container_name, FileType::Directory);
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, GetFileInfoObject) {
- RunGetFileInfoObjectTest();
+ AssertFileInfo(fs_.get(), "nonexistent-container", FileType::NotFound);
+
+ // URI
+ ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + data.container_name));
}
-TEST_F(AzuriteFileSystemTest, GetFileInfoSelector) {
+TEST_F(TestAzuriteFileSystem, GetFileInfoSelector) {
SetUpSmallFileSystemTree();
FileSelector select;
@@ -581,11 +840,10 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelector) {
// Root dir
select.base_dir = "";
ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
- ASSERT_EQ(infos.size(), 3);
+ ASSERT_EQ(infos.size(), 2);
ASSERT_EQ(infos, SortedInfos(infos));
AssertFileInfo(infos[0], "container", FileType::Directory);
AssertFileInfo(infos[1], "empty-container", FileType::Directory);
- AssertFileInfo(infos[2], container_name_, FileType::Directory);
// Empty container
select.base_dir = "empty-container";
@@ -641,7 +899,7 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelector) {
ASSERT_EQ(infos.size(), 4);
}
-TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorRecursive) {
+TEST_F(TestAzuriteFileSystem, GetFileInfoSelectorRecursive) {
SetUpSmallFileSystemTree();
FileSelector select;
@@ -651,7 +909,7 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorRecursive)
{
// Root dir
select.base_dir = "";
ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
- ASSERT_EQ(infos.size(), 14);
+ ASSERT_EQ(infos.size(), 12);
ASSERT_EQ(infos, SortedInfos(infos));
AssertInfoAllContainersRecursive(infos);
@@ -699,7 +957,7 @@ TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorRecursive)
{
AssertFileInfo(infos[3], "container/otherdir/1/2/3/otherfile",
FileType::File, 10);
}
-TEST_F(AzuriteFileSystemTest, GetFileInfoSelectorExplicitImplicitDirDedup) {
+TEST_F(TestAzuriteFileSystem, GetFileInfoSelectorExplicitImplicitDirDedup) {
{
auto container = CreateContainer("container");
CreateBlob(container, "mydir/emptydir1/");
@@ -746,137 +1004,60 @@ TEST_F(AzuriteFileSystemTest,
GetFileInfoSelectorExplicitImplicitDirDedup) {
AssertFileInfo(infos[0], "container/mydir/nonemptydir2/somefile",
FileType::File);
}
-TEST_F(AzuriteFileSystemTest, CreateDirFailureNoContainer) {
+TEST_F(TestAzuriteFileSystem, CreateDirFailureNoContainer) {
ASSERT_RAISES(Invalid, fs_->CreateDir("", false));
}
-TEST_F(AzuriteFileSystemTest, CreateDirSuccessContainerOnly) {
- auto container_name = RandomContainerName();
+TEST_F(TestAzuriteFileSystem, CreateDirSuccessContainerOnly) {
+ auto container_name = PreexistingData::RandomContainerName(rng_);
ASSERT_OK(fs_->CreateDir(container_name, false));
arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
}
-TEST_F(AzuriteFileSystemTest, CreateDirSuccessContainerAndDirectory) {
- const auto path = PreexistingContainerPath() + RandomDirectoryName();
- ASSERT_OK(fs_->CreateDir(path, false));
- // There is only virtual directory without hierarchical namespace
- // support. So the CreateDir() does nothing.
- arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
-}
-
-TEST_F(AzureHierarchicalNamespaceFileSystemTest,
CreateDirSuccessContainerAndDirectory) {
- const auto path = PreexistingContainerPath() + RandomDirectoryName();
- ASSERT_OK(fs_->CreateDir(path, false));
- arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
-}
-
-TEST_F(AzuriteFileSystemTest, CreateDirFailureDirectoryWithMissingContainer) {
+TEST_F(TestAzuriteFileSystem, CreateDirFailureDirectoryWithMissingContainer) {
const auto path = std::string("not-a-container/new-directory");
ASSERT_RAISES(IOError, fs_->CreateDir(path, false));
}
-TEST_F(AzuriteFileSystemTest, CreateDirRecursiveFailureNoContainer) {
+TEST_F(TestAzuriteFileSystem, CreateDirRecursiveFailureNoContainer) {
ASSERT_RAISES(Invalid, fs_->CreateDir("", true));
}
-TEST_F(AzureHierarchicalNamespaceFileSystemTest,
CreateDirRecursiveSuccessContainerOnly) {
- auto container_name = RandomContainerName();
- ASSERT_OK(fs_->CreateDir(container_name, true));
- arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
-}
-
-TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessContainerOnly) {
- auto container_name = RandomContainerName();
- ASSERT_OK(fs_->CreateDir(container_name, true));
- arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
-}
-
-TEST_F(AzureHierarchicalNamespaceFileSystemTest,
CreateDirRecursiveSuccessDirectoryOnly) {
- const auto parent = PreexistingContainerPath() + RandomDirectoryName();
- const auto path = internal::ConcatAbstractPath(parent, "new-sub");
- ASSERT_OK(fs_->CreateDir(path, true));
- arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
- arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
-}
-
-TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessDirectoryOnly) {
- const auto parent = PreexistingContainerPath() + RandomDirectoryName();
- const auto path = internal::ConcatAbstractPath(parent, "new-sub");
- ASSERT_OK(fs_->CreateDir(path, true));
- // There is only virtual directory without hierarchical namespace
- // support. So the CreateDir() does nothing.
- arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
- arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
-}
-
-TEST_F(AzureHierarchicalNamespaceFileSystemTest,
- CreateDirRecursiveSuccessContainerAndDirectory) {
- auto container_name = RandomContainerName();
- const auto parent = internal::ConcatAbstractPath(container_name,
RandomDirectoryName());
- const auto path = internal::ConcatAbstractPath(parent, "new-sub");
- ASSERT_OK(fs_->CreateDir(path, true));
- arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
- arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
- arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
+TEST_F(TestAzuriteFileSystem, CreateDirUri) {
+ ASSERT_RAISES(
+ Invalid,
+ fs_->CreateDir("abfs://" + PreexistingData::RandomContainerName(rng_),
true));
}
-TEST_F(AzuriteFileSystemTest, CreateDirRecursiveSuccessContainerAndDirectory) {
- auto container_name = RandomContainerName();
- const auto parent = internal::ConcatAbstractPath(container_name,
RandomDirectoryName());
- const auto path = internal::ConcatAbstractPath(parent, "new-sub");
- ASSERT_OK(fs_->CreateDir(path, true));
- // There is only virtual directory without hierarchical namespace
- // support. So the CreateDir() does nothing.
- arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
- arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
- arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
-}
-
-TEST_F(AzuriteFileSystemTest, CreateDirUri) {
- ASSERT_RAISES(Invalid, fs_->CreateDir("abfs://" + RandomContainerName(),
true));
-}
-
-TEST_F(AzuriteFileSystemTest, DeleteDirSuccessContainer) {
- const auto container_name = RandomContainerName();
+TEST_F(TestAzuriteFileSystem, DeleteDirSuccessContainer) {
+ const auto container_name = PreexistingData::RandomContainerName(rng_);
ASSERT_OK(fs_->CreateDir(container_name));
arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
ASSERT_OK(fs_->DeleteDir(container_name));
arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::NotFound);
}
-TEST_F(AzuriteFileSystemTest, DeleteDirSuccessEmpty) {
- const auto directory_path =
- internal::ConcatAbstractPath(PreexistingContainerName(),
RandomDirectoryName());
- // There is only virtual directory without hierarchical namespace
- // support. So the CreateDir() and DeleteDir() do nothing.
- ASSERT_OK(fs_->CreateDir(directory_path));
- arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
- ASSERT_OK(fs_->DeleteDir(directory_path));
- arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
-}
-
-TEST_F(AzuriteFileSystemTest, DeleteDirSuccessNonexistent) {
- const auto directory_path =
- internal::ConcatAbstractPath(PreexistingContainerName(),
RandomDirectoryName());
+TEST_F(TestAzuriteFileSystem, DeleteDirSuccessNonexistent) {
+ auto data = SetUpPreexistingData();
+ const auto directory_path = data.RandomDirectoryPath(rng_);
// There is only virtual directory without hierarchical namespace
// support. So the DeleteDir() for nonexistent directory does nothing.
ASSERT_OK(fs_->DeleteDir(directory_path));
arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
}
-TEST_F(AzuriteFileSystemTest, DeleteDirSuccessHaveBlobs) {
+TEST_F(TestAzuriteFileSystem, DeleteDirSuccessHaveBlobs) {
#ifdef __APPLE__
GTEST_SKIP() << "This test fails by an Azurite problem: "
"https://github.com/Azure/Azurite/pull/2302";
#endif
- const auto directory_path =
- internal::ConcatAbstractPath(PreexistingContainerName(),
RandomDirectoryName());
+ auto data = SetUpPreexistingData();
+ const auto directory_path = data.RandomDirectoryPath(rng_);
// We must use 257 or more blobs here to test pagination of ListBlobs().
// Because we can't add 257 or more delete blob requests to one
SubmitBatch().
int64_t n_blobs = 257;
for (int64_t i = 0; i < n_blobs; ++i) {
- const auto blob_path =
- internal::ConcatAbstractPath(directory_path, std::to_string(i) +
".txt");
+ const auto blob_path = ConcatAbstractPath(directory_path,
std::to_string(i) + ".txt");
ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path));
ASSERT_OK(output->Write(std::string_view(std::to_string(i))));
ASSERT_OK(output->Close());
@@ -884,62 +1065,24 @@ TEST_F(AzuriteFileSystemTest, DeleteDirSuccessHaveBlobs)
{
}
ASSERT_OK(fs_->DeleteDir(directory_path));
for (int64_t i = 0; i < n_blobs; ++i) {
- const auto blob_path =
- internal::ConcatAbstractPath(directory_path, std::to_string(i) +
".txt");
+ const auto blob_path = ConcatAbstractPath(directory_path,
std::to_string(i) + ".txt");
arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound);
}
}
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirSuccessEmpty) {
- const auto directory_path =
- internal::ConcatAbstractPath(PreexistingContainerName(),
RandomDirectoryName());
- ASSERT_OK(fs_->CreateDir(directory_path, true));
- arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::Directory);
- ASSERT_OK(fs_->DeleteDir(directory_path));
- arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
-}
-
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirFailureNonexistent) {
- const auto path =
- internal::ConcatAbstractPath(PreexistingContainerName(),
RandomDirectoryName());
- ASSERT_RAISES(IOError, fs_->DeleteDir(path));
+TEST_F(TestAzuriteFileSystem, DeleteDirUri) {
+ auto data = SetUpPreexistingData();
+ ASSERT_RAISES(Invalid, fs_->DeleteDir("abfs://" + data.container_name +
"/"));
}
-TEST_F(AzureHierarchicalNamespaceFileSystemTest, DeleteDirSuccessHaveBlob) {
- const auto directory_path =
- internal::ConcatAbstractPath(PreexistingContainerName(),
RandomDirectoryName());
- const auto blob_path = internal::ConcatAbstractPath(directory_path,
"hello.txt");
- ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path));
- ASSERT_OK(output->Write(std::string_view("hello")));
- ASSERT_OK(output->Close());
- arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::File);
- ASSERT_OK(fs_->DeleteDir(directory_path));
- arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound);
-}
-
-TEST_F(AzureHierarchicalNamespaceFileSystemTest,
DeleteDirSuccessHaveDirectory) {
- const auto parent =
- internal::ConcatAbstractPath(PreexistingContainerName(),
RandomDirectoryName());
- const auto path = internal::ConcatAbstractPath(parent, "new-sub");
- ASSERT_OK(fs_->CreateDir(path, true));
- arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
- arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
- ASSERT_OK(fs_->DeleteDir(parent));
- arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
- arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
-}
-
-TEST_F(AzuriteFileSystemTest, DeleteDirUri) {
- ASSERT_RAISES(Invalid, fs_->DeleteDir("abfs://" +
PreexistingContainerPath()));
-}
-
-TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessContainer) {
+TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessContainer) {
#ifdef __APPLE__
GTEST_SKIP() << "This test fails by an Azurite problem: "
"https://github.com/Azure/Azurite/pull/2302";
#endif
+ auto data = SetUpPreexistingData();
HierarchicalPaths paths;
- CreateHierarchicalData(paths);
+ CreateHierarchicalData(&paths);
ASSERT_OK(fs_->DeleteDirContents(paths.container));
arrow::fs::AssertFileInfo(fs_.get(), paths.container, FileType::Directory);
arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::NotFound);
@@ -948,13 +1091,14 @@ TEST_F(AzuriteFileSystemTest,
DeleteDirContentsSuccessContainer) {
}
}
-TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessDirectory) {
+TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessDirectory) {
#ifdef __APPLE__
GTEST_SKIP() << "This test fails by an Azurite problem: "
"https://github.com/Azure/Azurite/pull/2302";
#endif
+ auto data = SetUpPreexistingData();
HierarchicalPaths paths;
- CreateHierarchicalData(paths);
+ CreateHierarchicalData(&paths);
ASSERT_OK(fs_->DeleteDirContents(paths.directory));
// GH-38772: We may change this to FileType::Directory.
arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::NotFound);
@@ -963,98 +1107,72 @@ TEST_F(AzuriteFileSystemTest,
DeleteDirContentsSuccessDirectory) {
}
}
-TEST_F(AzuriteFileSystemTest, DeleteDirContentsSuccessNonexistent) {
- const auto directory_path =
- internal::ConcatAbstractPath(PreexistingContainerName(),
RandomDirectoryName());
- ASSERT_OK(fs_->DeleteDirContents(directory_path, true));
- arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
-}
-
-TEST_F(AzuriteFileSystemTest, DeleteDirContentsFailureNonexistent) {
- const auto directory_path =
- internal::ConcatAbstractPath(PreexistingContainerName(),
RandomDirectoryName());
- ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false));
-}
-
-TEST_F(AzureHierarchicalNamespaceFileSystemTest,
DeleteDirContentsSuccessExist) {
- HierarchicalPaths paths;
- CreateHierarchicalData(paths);
- ASSERT_OK(fs_->DeleteDirContents(paths.directory));
- arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::Directory);
- for (const auto& sub_path : paths.sub_paths) {
- arrow::fs::AssertFileInfo(fs_.get(), sub_path, FileType::NotFound);
- }
-}
-
-TEST_F(AzureHierarchicalNamespaceFileSystemTest,
DeleteDirContentsSuccessNonexistent) {
- const auto directory_path =
- internal::ConcatAbstractPath(PreexistingContainerName(),
RandomDirectoryName());
- ASSERT_OK(fs_->DeleteDirContents(directory_path, true));
- arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessNonexistent) {
+ this->TestDeleteDirContentsSuccessNonexistent();
}
-TEST_F(AzureHierarchicalNamespaceFileSystemTest,
DeleteDirContentsFailureNonexistent) {
- const auto directory_path =
- internal::ConcatAbstractPath(PreexistingContainerName(),
RandomDirectoryName());
- ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false));
+TEST_F(TestAzuriteFileSystem, DeleteDirContentsFailureNonexistent) {
+ this->TestDeleteDirContentsFailureNonexistent();
}
-TEST_F(AzuriteFileSystemTest, CopyFileSuccessDestinationNonexistent) {
- const auto destination_path =
- internal::ConcatAbstractPath(PreexistingContainerName(),
"copy-destionation");
- ASSERT_OK(fs_->CopyFile(PreexistingObjectPath(), destination_path));
+TEST_F(TestAzuriteFileSystem, CopyFileSuccessDestinationNonexistent) {
+ auto data = SetUpPreexistingData();
+ const auto destination_path = data.ContainerPath("copy-destionation");
+ ASSERT_OK(fs_->CopyFile(data.ObjectPath(), destination_path));
ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(destination_path));
ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(info));
ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024));
- EXPECT_EQ(kLoremIpsum, buffer->ToString());
+ EXPECT_EQ(PreexistingData::kLoremIpsum, buffer->ToString());
}
-TEST_F(AzuriteFileSystemTest, CopyFileSuccessDestinationSame) {
- ASSERT_OK(fs_->CopyFile(PreexistingObjectPath(), PreexistingObjectPath()));
- ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingObjectPath()));
+TEST_F(TestAzuriteFileSystem, CopyFileSuccessDestinationSame) {
+ auto data = SetUpPreexistingData();
+ ASSERT_OK(fs_->CopyFile(data.ObjectPath(), data.ObjectPath()));
+ ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath()));
ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(info));
ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024));
- EXPECT_EQ(kLoremIpsum, buffer->ToString());
+ EXPECT_EQ(PreexistingData::kLoremIpsum, buffer->ToString());
}
-TEST_F(AzuriteFileSystemTest, CopyFileFailureDestinationTrailingSlash) {
- ASSERT_RAISES(IOError,
- fs_->CopyFile(PreexistingObjectPath(),
-
internal::EnsureTrailingSlash(PreexistingObjectPath())));
+TEST_F(TestAzuriteFileSystem, CopyFileFailureDestinationTrailingSlash) {
+ auto data = SetUpPreexistingData();
+ ASSERT_RAISES(IOError, fs_->CopyFile(data.ObjectPath(),
+
internal::EnsureTrailingSlash(data.ObjectPath())));
}
-TEST_F(AzuriteFileSystemTest, CopyFileFailureSourceNonexistent) {
- const auto destination_path =
- internal::ConcatAbstractPath(PreexistingContainerName(),
"copy-destionation");
- ASSERT_RAISES(IOError, fs_->CopyFile(NotFoundObjectPath(),
destination_path));
+TEST_F(TestAzuriteFileSystem, CopyFileFailureSourceNonexistent) {
+ auto data = SetUpPreexistingData();
+ const auto destination_path = data.ContainerPath("copy-destionation");
+ ASSERT_RAISES(IOError, fs_->CopyFile(data.NotFoundObjectPath(),
destination_path));
}
-TEST_F(AzuriteFileSystemTest, CopyFileFailureDestinationParentNonexistent) {
+TEST_F(TestAzuriteFileSystem, CopyFileFailureDestinationParentNonexistent) {
+ auto data = SetUpPreexistingData();
const auto destination_path =
- internal::ConcatAbstractPath(RandomContainerName(), "copy-destionation");
- ASSERT_RAISES(IOError, fs_->CopyFile(PreexistingObjectPath(),
destination_path));
+ ConcatAbstractPath(PreexistingData::RandomContainerName(rng_),
"copy-destionation");
+ ASSERT_RAISES(IOError, fs_->CopyFile(data.ObjectPath(), destination_path));
}
-TEST_F(AzuriteFileSystemTest, CopyFileUri) {
- const auto destination_path =
- internal::ConcatAbstractPath(PreexistingContainerName(),
"copy-destionation");
- ASSERT_RAISES(Invalid,
- fs_->CopyFile("abfs://" + PreexistingObjectPath(),
destination_path));
- ASSERT_RAISES(Invalid,
- fs_->CopyFile(PreexistingObjectPath(), "abfs://" +
destination_path));
+TEST_F(TestAzuriteFileSystem, CopyFileUri) {
+ auto data = SetUpPreexistingData();
+ const auto destination_path = data.ContainerPath("copy-destionation");
+ ASSERT_RAISES(Invalid, fs_->CopyFile("abfs://" + data.ObjectPath(),
destination_path));
+ ASSERT_RAISES(Invalid, fs_->CopyFile(data.ObjectPath(), "abfs://" +
destination_path));
}
-TEST_F(AzuriteFileSystemTest, OpenInputStreamString) {
+TEST_F(TestAzuriteFileSystem, OpenInputStreamString) {
+ auto data = SetUpPreexistingData();
std::shared_ptr<io::InputStream> stream;
- ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(PreexistingObjectPath()));
+ ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath()));
ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024));
- EXPECT_EQ(buffer->ToString(), kLoremIpsum);
+ EXPECT_EQ(buffer->ToString(), PreexistingData::kLoremIpsum);
}
-TEST_F(AzuriteFileSystemTest, OpenInputStreamStringBuffers) {
+TEST_F(TestAzuriteFileSystem, OpenInputStreamStringBuffers) {
+ auto data = SetUpPreexistingData();
std::shared_ptr<io::InputStream> stream;
- ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(PreexistingObjectPath()));
+ ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath()));
std::string contents;
std::shared_ptr<Buffer> buffer;
@@ -1063,23 +1181,25 @@ TEST_F(AzuriteFileSystemTest,
OpenInputStreamStringBuffers) {
contents.append(buffer->ToString());
} while (buffer && buffer->size() != 0);
- EXPECT_EQ(contents, kLoremIpsum);
+ EXPECT_EQ(contents, PreexistingData::kLoremIpsum);
}
-TEST_F(AzuriteFileSystemTest, OpenInputStreamInfo) {
- ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingObjectPath()));
+TEST_F(TestAzuriteFileSystem, OpenInputStreamInfo) {
+ auto data = SetUpPreexistingData();
+ ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath()));
std::shared_ptr<io::InputStream> stream;
ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(info));
ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024));
- EXPECT_EQ(buffer->ToString(), kLoremIpsum);
+ EXPECT_EQ(buffer->ToString(), PreexistingData::kLoremIpsum);
}
-TEST_F(AzuriteFileSystemTest, OpenInputStreamEmpty) {
+TEST_F(TestAzuriteFileSystem, OpenInputStreamEmpty) {
+ auto data = SetUpPreexistingData();
const auto path_to_file = "empty-object.txt";
- const auto path = PreexistingContainerPath() + path_to_file;
- blob_service_client_->GetBlobContainerClient(PreexistingContainerName())
+ const auto path = data.ContainerPath(path_to_file);
+ blob_service_client_->GetBlobContainerClient(data.container_name)
.GetBlockBlobClient(path_to_file)
.UploadFrom(nullptr, 0);
@@ -1090,24 +1210,28 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamEmpty) {
EXPECT_EQ(size, 0);
}
-TEST_F(AzuriteFileSystemTest, OpenInputStreamNotFound) {
- ASSERT_RAISES(IOError, fs_->OpenInputStream(NotFoundObjectPath()));
+TEST_F(TestAzuriteFileSystem, OpenInputStreamNotFound) {
+ auto data = SetUpPreexistingData();
+ ASSERT_RAISES(IOError, fs_->OpenInputStream(data.NotFoundObjectPath()));
}
-TEST_F(AzuriteFileSystemTest, OpenInputStreamInfoInvalid) {
- ASSERT_OK_AND_ASSIGN(auto info,
fs_->GetFileInfo(PreexistingContainerPath()));
+TEST_F(TestAzuriteFileSystem, OpenInputStreamInfoInvalid) {
+ auto data = SetUpPreexistingData();
+ ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.container_name + "/"));
ASSERT_RAISES(IOError, fs_->OpenInputStream(info));
- ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(NotFoundObjectPath()));
+ ASSERT_OK_AND_ASSIGN(auto info2,
fs_->GetFileInfo(data.NotFoundObjectPath()));
ASSERT_RAISES(IOError, fs_->OpenInputStream(info2));
}
-TEST_F(AzuriteFileSystemTest, OpenInputStreamUri) {
- ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" +
PreexistingObjectPath()));
+TEST_F(TestAzuriteFileSystem, OpenInputStreamUri) {
+ auto data = SetUpPreexistingData();
+ ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + data.ObjectPath()));
}
-TEST_F(AzuriteFileSystemTest, OpenInputStreamTrailingSlash) {
- ASSERT_RAISES(IOError, fs_->OpenInputStream(PreexistingObjectPath() + '/'));
+TEST_F(TestAzuriteFileSystem, OpenInputStreamTrailingSlash) {
+ auto data = SetUpPreexistingData();
+ ASSERT_RAISES(IOError, fs_->OpenInputStream(data.ObjectPath() + '/'));
}
namespace {
@@ -1145,9 +1269,10 @@ std::shared_ptr<const KeyValueMetadata>
NormalizerKeyValueMetadata(
}
}; // namespace
-TEST_F(AzuriteFileSystemTest, OpenInputStreamReadMetadata) {
+TEST_F(TestAzuriteFileSystem, OpenInputStreamReadMetadata) {
+ auto data = SetUpPreexistingData();
std::shared_ptr<io::InputStream> stream;
- ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(PreexistingObjectPath()));
+ ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath()));
std::shared_ptr<const KeyValueMetadata> actual;
ASSERT_OK_AND_ASSIGN(actual, stream->ReadMetadata());
@@ -1175,8 +1300,9 @@ TEST_F(AzuriteFileSystemTest,
OpenInputStreamReadMetadata) {
NormalizerKeyValueMetadata(actual)->ToString());
}
-TEST_F(AzuriteFileSystemTest, OpenInputStreamClosed) {
- ASSERT_OK_AND_ASSIGN(auto stream,
fs_->OpenInputStream(PreexistingObjectPath()));
+TEST_F(TestAzuriteFileSystem, OpenInputStreamClosed) {
+ auto data = SetUpPreexistingData();
+ ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(data.ObjectPath()));
ASSERT_OK(stream->Close());
std::array<char, 16> buffer{};
ASSERT_RAISES(Invalid, stream->Read(buffer.size(), buffer.data()));
@@ -1184,44 +1310,45 @@ TEST_F(AzuriteFileSystemTest, OpenInputStreamClosed) {
ASSERT_RAISES(Invalid, stream->Tell());
}
-TEST_F(AzuriteFileSystemTest, TestWriteMetadata) {
+TEST_F(TestAzuriteFileSystem, WriteMetadata) {
+ auto data = SetUpPreexistingData();
options_.default_metadata = arrow::key_value_metadata({{"foo", "bar"}});
ASSERT_OK_AND_ASSIGN(auto fs_with_defaults, AzureFileSystem::Make(options_));
- std::string path = "object_with_defaults";
- auto location = PreexistingContainerPath() + path;
+ std::string blob_path = "object_with_defaults";
+ auto full_path = data.ContainerPath(blob_path);
ASSERT_OK_AND_ASSIGN(auto output,
- fs_with_defaults->OpenOutputStream(location,
/*metadata=*/{}));
- const std::string_view expected(kLoremIpsum);
+ fs_with_defaults->OpenOutputStream(full_path,
/*metadata=*/{}));
+ const std::string_view expected(PreexistingData::kLoremIpsum);
ASSERT_OK(output->Write(expected));
ASSERT_OK(output->Close());
// Verify the metadata has been set.
- auto blob_metadata =
- blob_service_client_->GetBlobContainerClient(PreexistingContainerName())
- .GetBlockBlobClient(path)
- .GetProperties()
- .Value.Metadata;
- EXPECT_EQ(Azure::Core::CaseInsensitiveMap{std::make_pair("foo", "bar")},
blob_metadata);
+ auto blob_metadata =
blob_service_client_->GetBlobContainerClient(data.container_name)
+ .GetBlockBlobClient(blob_path)
+ .GetProperties()
+ .Value.Metadata;
+ EXPECT_EQ(Core::CaseInsensitiveMap{std::make_pair("foo", "bar")},
blob_metadata);
// Check that explicit metadata overrides the defaults.
ASSERT_OK_AND_ASSIGN(
output, fs_with_defaults->OpenOutputStream(
- location, /*metadata=*/arrow::key_value_metadata({{"bar",
"foo"}})));
+ full_path, /*metadata=*/arrow::key_value_metadata({{"bar",
"foo"}})));
ASSERT_OK(output->Write(expected));
ASSERT_OK(output->Close());
- blob_metadata =
blob_service_client_->GetBlobContainerClient(PreexistingContainerName())
- .GetBlockBlobClient(path)
+ blob_metadata =
blob_service_client_->GetBlobContainerClient(data.container_name)
+ .GetBlockBlobClient(blob_path)
.GetProperties()
.Value.Metadata;
// Defaults are overwritten and not merged.
- EXPECT_EQ(Azure::Core::CaseInsensitiveMap{std::make_pair("bar", "foo")},
blob_metadata);
+ EXPECT_EQ(Core::CaseInsensitiveMap{std::make_pair("bar", "foo")},
blob_metadata);
}
-TEST_F(AzuriteFileSystemTest, OpenOutputStreamSmall) {
- const auto path = PreexistingContainerPath() + "test-write-object";
+TEST_F(TestAzuriteFileSystem, OpenOutputStreamSmall) {
+ auto data = SetUpPreexistingData();
+ const auto path = data.ContainerPath("test-write-object");
ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
- const std::string_view expected(kLoremIpsum);
+ const std::string_view expected(PreexistingData::kLoremIpsum);
ASSERT_OK(output->Write(expected));
ASSERT_OK(output->Close());
@@ -1234,8 +1361,9 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamSmall) {
EXPECT_EQ(expected, std::string_view(inbuf.data(), size));
}
-TEST_F(AzuriteFileSystemTest, OpenOutputStreamLarge) {
- const auto path = PreexistingContainerPath() + "test-write-object";
+TEST_F(TestAzuriteFileSystem, OpenOutputStreamLarge) {
+ auto data = SetUpPreexistingData();
+ const auto path = data.ContainerPath("test-write-object");
ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
std::array<std::int64_t, 3> sizes{257 * 1024, 258 * 1024, 259 * 1024};
std::array<std::string, 3> buffers{
@@ -1265,8 +1393,9 @@ TEST_F(AzuriteFileSystemTest, OpenOutputStreamLarge) {
EXPECT_EQ(contents, buffers[0] + buffers[1] + buffers[2]);
}
-TEST_F(AzuriteFileSystemTest, OpenOutputStreamTruncatesExistingFile) {
- const auto path = PreexistingContainerPath() + "test-write-object";
+TEST_F(TestAzuriteFileSystem, OpenOutputStreamTruncatesExistingFile) {
+ auto data = SetUpPreexistingData();
+ const auto path = data.ContainerPath("test-write-object");
ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
const std::string_view expected0("Existing blob content");
ASSERT_OK(output->Write(expected0));
@@ -1281,7 +1410,7 @@ TEST_F(AzuriteFileSystemTest,
OpenOutputStreamTruncatesExistingFile) {
EXPECT_EQ(expected0, std::string_view(inbuf.data(), size));
ASSERT_OK_AND_ASSIGN(output, fs_->OpenOutputStream(path, {}));
- const std::string_view expected1(kLoremIpsum);
+ const std::string_view expected1(PreexistingData::kLoremIpsum);
ASSERT_OK(output->Write(expected1));
ASSERT_OK(output->Close());
@@ -1291,8 +1420,9 @@ TEST_F(AzuriteFileSystemTest,
OpenOutputStreamTruncatesExistingFile) {
EXPECT_EQ(expected1, std::string_view(inbuf.data(), size));
}
-TEST_F(AzuriteFileSystemTest, OpenAppendStreamDoesNotTruncateExistingFile) {
- const auto path = PreexistingContainerPath() + "test-write-object";
+TEST_F(TestAzuriteFileSystem, OpenAppendStreamDoesNotTruncateExistingFile) {
+ auto data = SetUpPreexistingData();
+ const auto path = data.ContainerPath("test-write-object");
ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
const std::string_view expected0("Existing blob content");
ASSERT_OK(output->Write(expected0));
@@ -1307,7 +1437,7 @@ TEST_F(AzuriteFileSystemTest,
OpenAppendStreamDoesNotTruncateExistingFile) {
EXPECT_EQ(expected0, std::string_view(inbuf.data()));
ASSERT_OK_AND_ASSIGN(output, fs_->OpenAppendStream(path, {}));
- const std::string_view expected1(kLoremIpsum);
+ const std::string_view expected1(PreexistingData::kLoremIpsum);
ASSERT_OK(output->Write(expected1));
ASSERT_OK(output->Close());
@@ -1319,35 +1449,37 @@ TEST_F(AzuriteFileSystemTest,
OpenAppendStreamDoesNotTruncateExistingFile) {
std::string(expected0) + std::string(expected1));
}
-TEST_F(AzuriteFileSystemTest, OpenOutputStreamClosed) {
- const auto path = internal::ConcatAbstractPath(PreexistingContainerName(),
-
"open-output-stream-closed.txt");
+TEST_F(TestAzuriteFileSystem, OpenOutputStreamClosed) {
+ auto data = SetUpPreexistingData();
+ const auto path = data.ContainerPath("open-output-stream-closed.txt");
ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
ASSERT_OK(output->Close());
- ASSERT_RAISES(Invalid, output->Write(kLoremIpsum, std::strlen(kLoremIpsum)));
+ ASSERT_RAISES(Invalid, output->Write(PreexistingData::kLoremIpsum,
+
std::strlen(PreexistingData::kLoremIpsum)));
ASSERT_RAISES(Invalid, output->Flush());
ASSERT_RAISES(Invalid, output->Tell());
}
-TEST_F(AzuriteFileSystemTest, OpenOutputStreamUri) {
- const auto path = internal::ConcatAbstractPath(PreexistingContainerName(),
- "open-output-stream-uri.txt");
+TEST_F(TestAzuriteFileSystem, OpenOutputStreamUri) {
+ auto data = SetUpPreexistingData();
+ const auto path = data.ContainerPath("open-output-stream-uri.txt");
ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + path));
}
-TEST_F(AzuriteFileSystemTest, OpenInputFileMixedReadVsReadAt) {
+TEST_F(TestAzuriteFileSystem, OpenInputFileMixedReadVsReadAt) {
+ auto data = SetUpPreexistingData();
// Create a file large enough to make the random access tests non-trivial.
auto constexpr kLineWidth = 100;
auto constexpr kLineCount = 4096;
std::vector<std::string> lines(kLineCount);
int lineno = 0;
- std::generate_n(lines.begin(), lines.size(),
- [&] { return RandomLine(++lineno, kLineWidth); });
+ std::generate_n(lines.begin(), lines.size(), [&] {
+ return PreexistingData::RandomLine(++lineno, kLineWidth, rng_);
+ });
- const auto path_to_file = "OpenInputFileMixedReadVsReadAt/object-name";
- const auto path = PreexistingContainerPath() + path_to_file;
+ const auto path =
data.ContainerPath("OpenInputFileMixedReadVsReadAt/object-name");
- UploadLines(lines, path_to_file, kLineCount * kLineWidth);
+ UploadLines(lines, path, kLineCount * kLineWidth);
std::shared_ptr<io::RandomAccessFile> file;
ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(path));
@@ -1368,7 +1500,7 @@ TEST_F(AzuriteFileSystemTest,
OpenInputFileMixedReadVsReadAt) {
}
// Verify random reads interleave too.
- auto const index = RandomIndex(kLineCount);
+ auto const index = PreexistingData::RandomIndex(kLineCount, rng_);
auto const position = index * kLineWidth;
ASSERT_OK_AND_ASSIGN(size, file->ReadAt(position, buffer.size(),
buffer.data()));
EXPECT_EQ(size, kLineWidth);
@@ -1381,27 +1513,28 @@ TEST_F(AzuriteFileSystemTest,
OpenInputFileMixedReadVsReadAt) {
}
}
-TEST_F(AzuriteFileSystemTest, OpenInputFileRandomSeek) {
+TEST_F(TestAzuriteFileSystem, OpenInputFileRandomSeek) {
+ auto data = SetUpPreexistingData();
// Create a file large enough to make the random access tests non-trivial.
auto constexpr kLineWidth = 100;
auto constexpr kLineCount = 4096;
std::vector<std::string> lines(kLineCount);
int lineno = 0;
- std::generate_n(lines.begin(), lines.size(),
- [&] { return RandomLine(++lineno, kLineWidth); });
+ std::generate_n(lines.begin(), lines.size(), [&] {
+ return PreexistingData::RandomLine(++lineno, kLineWidth, rng_);
+ });
- const auto path_to_file = "OpenInputFileRandomSeek/object-name";
- const auto path = PreexistingContainerPath() + path_to_file;
+ const auto path = data.ContainerPath("OpenInputFileRandomSeek/object-name");
std::shared_ptr<io::OutputStream> output;
- UploadLines(lines, path_to_file, kLineCount * kLineWidth);
+ UploadLines(lines, path, kLineCount * kLineWidth);
std::shared_ptr<io::RandomAccessFile> file;
ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(path));
for (int i = 0; i != 32; ++i) {
SCOPED_TRACE("Iteration " + std::to_string(i));
// Verify sequential reads work as expected.
- auto const index = RandomIndex(kLineCount);
+ auto const index = PreexistingData::RandomIndex(kLineCount, rng_);
auto const position = index * kLineWidth;
ASSERT_OK(file->Seek(position));
ASSERT_OK_AND_ASSIGN(auto actual, file->Read(kLineWidth));
@@ -1409,15 +1542,15 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileRandomSeek) {
}
}
-TEST_F(AzuriteFileSystemTest, OpenInputFileIoContext) {
+TEST_F(TestAzuriteFileSystem, OpenInputFileIoContext) {
+ auto data = SetUpPreexistingData();
// Create a test file.
- const auto path_to_file = "OpenInputFileIoContext/object-name";
- const auto path = PreexistingContainerPath() + path_to_file;
+ const auto blob_path = "OpenInputFileIoContext/object-name";
+ const auto path = data.ContainerPath(blob_path);
const std::string contents = "The quick brown fox jumps over the lazy dog";
- auto blob_client =
- blob_service_client_->GetBlobContainerClient(PreexistingContainerName())
- .GetBlockBlobClient(path_to_file);
+ auto blob_client =
blob_service_client_->GetBlobContainerClient(data.container_name)
+ .GetBlockBlobClient(blob_path);
blob_client.UploadFrom(reinterpret_cast<const uint8_t*>(contents.data()),
contents.length());
@@ -1426,8 +1559,9 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileIoContext) {
EXPECT_EQ(fs_->io_context().external_id(), file->io_context().external_id());
}
-TEST_F(AzuriteFileSystemTest, OpenInputFileInfo) {
- ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(PreexistingObjectPath()));
+TEST_F(TestAzuriteFileSystem, OpenInputFileInfo) {
+ auto data = SetUpPreexistingData();
+ ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath()));
std::shared_ptr<io::RandomAccessFile> file;
ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(info));
@@ -1437,24 +1571,27 @@ TEST_F(AzuriteFileSystemTest, OpenInputFileInfo) {
auto constexpr kStart = 16;
ASSERT_OK_AND_ASSIGN(size, file->ReadAt(kStart, buffer.size(),
buffer.data()));
- auto const expected = std::string(kLoremIpsum).substr(kStart);
+ auto const expected =
std::string(PreexistingData::kLoremIpsum).substr(kStart);
EXPECT_EQ(std::string(buffer.data(), size), expected);
}
-TEST_F(AzuriteFileSystemTest, OpenInputFileNotFound) {
- ASSERT_RAISES(IOError, fs_->OpenInputFile(NotFoundObjectPath()));
+TEST_F(TestAzuriteFileSystem, OpenInputFileNotFound) {
+ auto data = SetUpPreexistingData();
+ ASSERT_RAISES(IOError, fs_->OpenInputFile(data.NotFoundObjectPath()));
}
-TEST_F(AzuriteFileSystemTest, OpenInputFileInfoInvalid) {
- ASSERT_OK_AND_ASSIGN(auto info,
fs_->GetFileInfo(PreexistingContainerPath()));
+TEST_F(TestAzuriteFileSystem, OpenInputFileInfoInvalid) {
+ auto data = SetUpPreexistingData();
+ ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.container_name));
ASSERT_RAISES(IOError, fs_->OpenInputFile(info));
- ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(NotFoundObjectPath()));
+ ASSERT_OK_AND_ASSIGN(auto info2,
fs_->GetFileInfo(data.NotFoundObjectPath()));
ASSERT_RAISES(IOError, fs_->OpenInputFile(info2));
}
-TEST_F(AzuriteFileSystemTest, OpenInputFileClosed) {
- ASSERT_OK_AND_ASSIGN(auto stream,
fs_->OpenInputFile(PreexistingObjectPath()));
+TEST_F(TestAzuriteFileSystem, OpenInputFileClosed) {
+ auto data = SetUpPreexistingData();
+ ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputFile(data.ObjectPath()));
ASSERT_OK(stream->Close());
std::array<char, 16> buffer{};
ASSERT_RAISES(Invalid, stream->Tell());