felipecrv commented on code in PR #38269:
URL: https://github.com/apache/arrow/pull/38269#discussion_r1363988548
##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -37,34 +43,330 @@ bool AzureOptions::Equals(const AzureOptions& other) const
{
credentials_kind == other.credentials_kind);
}
+Status AzureOptions::ConfigureAccountKeyCredentials(const std::string&
account_name,
+ const std::string&
account_key) {
+ if (this->backend == AzureBackend::Azurite) {
+ account_blob_url = "http://127.0.0.1:10000/" + account_name + "/";
+ account_dfs_url = "http://127.0.0.1:10000/" + account_name + "/";
+ } else {
+ account_dfs_url = "https://" + account_name + ".dfs.core.windows.net/";
+ account_blob_url = "https://" + account_name + ".blob.core.windows.net/";
+ }
+ storage_credentials_provider =
+
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(account_name,
+
account_key);
+ credentials_kind = AzureCredentialsKind::StorageCredentials;
+ return Status::OK();
+}
+namespace {
+
+// An AzureFileSystem represents a single Azure storage account. AzurePath
describes a
+// container and path within that storage account.
+struct AzurePath {
+ std::string full_path;
+ std::string container;
+ std::string path_to_file;
+ std::vector<std::string> path_to_file_parts;
+
+ static Result<AzurePath> FromString(const std::string& s) {
+ // Example expected string format: testcontainer/testdir/testfile.txt
+ // container = testcontainer
+ // path_to_file = testdir/testfile.txt
+ // path_to_file_parts = [testdir, testfile.txt]
+ if (internal::IsLikelyUri(s)) {
+ return Status::Invalid(
+ "Expected an Azure object path of the form 'container/path...', got
a URI: '",
+ s, "'");
+ }
+ auto src = internal::RemoveTrailingSlash(s);
+ auto input_path = std::string(src.data());
+ src = internal::RemoveLeadingSlash(src);
+ auto first_sep = src.find_first_of(internal::kSep);
+ if (first_sep == 0) {
+ return Status::Invalid("Path cannot start with a separator ('",
input_path, "')");
Review Comment:
I would pass `s` instead of `input_path` here since `s` is what the caller
provided.
##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -37,34 +43,330 @@ bool AzureOptions::Equals(const AzureOptions& other) const
{
credentials_kind == other.credentials_kind);
}
+Status AzureOptions::ConfigureAccountKeyCredentials(const std::string&
account_name,
+ const std::string&
account_key) {
+ if (this->backend == AzureBackend::Azurite) {
+ account_blob_url = "http://127.0.0.1:10000/" + account_name + "/";
+ account_dfs_url = "http://127.0.0.1:10000/" + account_name + "/";
+ } else {
+ account_dfs_url = "https://" + account_name + ".dfs.core.windows.net/";
+ account_blob_url = "https://" + account_name + ".blob.core.windows.net/";
+ }
+ storage_credentials_provider =
+
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(account_name,
+
account_key);
+ credentials_kind = AzureCredentialsKind::StorageCredentials;
+ return Status::OK();
+}
+namespace {
+
+// An AzureFileSystem represents a single Azure storage account. AzurePath
describes a
+// container and path within that storage account.
+struct AzurePath {
+ std::string full_path;
+ std::string container;
+ std::string path_to_file;
+ std::vector<std::string> path_to_file_parts;
+
+ static Result<AzurePath> FromString(const std::string& s) {
+ // Example expected string format: testcontainer/testdir/testfile.txt
+ // container = testcontainer
+ // path_to_file = testdir/testfile.txt
+ // path_to_file_parts = [testdir, testfile.txt]
+ if (internal::IsLikelyUri(s)) {
+ return Status::Invalid(
+ "Expected an Azure object path of the form 'container/path...', got
a URI: '",
+ s, "'");
+ }
+ auto src = internal::RemoveTrailingSlash(s);
+ auto input_path = std::string(src.data());
Review Comment:
`src` is a `string_view` (a pointer and a length), when you use only
`src.data()` (a `const *`) to create a `std::string` it will use all the
characters before it finds a `'\0'`. In general that zero is not even
guaranteed to be present in `std::string_view`, but here it will be since its
pointer comes from a `std::string` (always zero-terminated to honor C
tradition). The problem is that the `\0` will come after the potential trailing
slash that `RemoveTrailingSlash` "removes" by creating a `string_view` with a
smaller length than `s.size()` when `s` contains a trailing slash.
To build a `std::string` from a `std::string_view` you can
`std::string input_path{src.data(), src.size()};` or just `std::string
input_path{src};`.
This conversion isn't made implicit because it allocates a new buffer for
the `std::string` being created. Conversion from `std::string` to
`std::string_view` is implicit though -- because it's free.
##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -37,34 +43,330 @@ bool AzureOptions::Equals(const AzureOptions& other) const
{
credentials_kind == other.credentials_kind);
}
+Status AzureOptions::ConfigureAccountKeyCredentials(const std::string&
account_name,
+ const std::string&
account_key) {
+ if (this->backend == AzureBackend::Azurite) {
+ account_blob_url = "http://127.0.0.1:10000/" + account_name + "/";
+ account_dfs_url = "http://127.0.0.1:10000/" + account_name + "/";
+ } else {
+ account_dfs_url = "https://" + account_name + ".dfs.core.windows.net/";
+ account_blob_url = "https://" + account_name + ".blob.core.windows.net/";
+ }
+ storage_credentials_provider =
+
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(account_name,
+
account_key);
+ credentials_kind = AzureCredentialsKind::StorageCredentials;
+ return Status::OK();
+}
+namespace {
+
+// An AzureFileSystem represents a single Azure storage account. AzurePath
describes a
+// container and path within that storage account.
+struct AzurePath {
+ std::string full_path;
+ std::string container;
+ std::string path_to_file;
+ std::vector<std::string> path_to_file_parts;
+
+ static Result<AzurePath> FromString(const std::string& s) {
+ // Example expected string format: testcontainer/testdir/testfile.txt
+ // container = testcontainer
+ // path_to_file = testdir/testfile.txt
+ // path_to_file_parts = [testdir, testfile.txt]
+ if (internal::IsLikelyUri(s)) {
+ return Status::Invalid(
+ "Expected an Azure object path of the form 'container/path...', got
a URI: '",
+ s, "'");
+ }
+ auto src = internal::RemoveTrailingSlash(s);
+ auto input_path = std::string(src.data());
Review Comment:
I'm writing these long comments to help you improve your C++ knowledge. It's
a very confusing language, so don't feel discouraged by these small mistakes.
##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -37,34 +43,330 @@ bool AzureOptions::Equals(const AzureOptions& other) const
{
credentials_kind == other.credentials_kind);
}
+Status AzureOptions::ConfigureAccountKeyCredentials(const std::string&
account_name,
+ const std::string&
account_key) {
+ if (this->backend == AzureBackend::Azurite) {
+ account_blob_url = "http://127.0.0.1:10000/" + account_name + "/";
+ account_dfs_url = "http://127.0.0.1:10000/" + account_name + "/";
+ } else {
+ account_dfs_url = "https://" + account_name + ".dfs.core.windows.net/";
+ account_blob_url = "https://" + account_name + ".blob.core.windows.net/";
+ }
+ storage_credentials_provider =
+
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(account_name,
+
account_key);
+ credentials_kind = AzureCredentialsKind::StorageCredentials;
+ return Status::OK();
+}
+namespace {
+
+// An AzureFileSystem represents a single Azure storage account. AzurePath
describes a
+// container and path within that storage account.
+struct AzurePath {
+ std::string full_path;
+ std::string container;
+ std::string path_to_file;
+ std::vector<std::string> path_to_file_parts;
+
+ static Result<AzurePath> FromString(const std::string& s) {
+ // Example expected string format: testcontainer/testdir/testfile.txt
+ // container = testcontainer
+ // path_to_file = testdir/testfile.txt
+ // path_to_file_parts = [testdir, testfile.txt]
+ if (internal::IsLikelyUri(s)) {
+ return Status::Invalid(
+ "Expected an Azure object path of the form 'container/path...', got
a URI: '",
+ s, "'");
+ }
+ auto src = internal::RemoveTrailingSlash(s);
+ auto input_path = std::string(src.data());
Review Comment:
I think you don't even need the `input_path` variable.
##########
cpp/src/arrow/filesystem/azurefs_test.cc:
##########
@@ -113,33 +123,6 @@ AzuriteEnv* GetAzuriteEnv() {
// Placeholder tests
// TODO: GH-18014 Remove once a proper test is added
-TEST(AzureFileSystem, UploadThenDownload) {
Review Comment:
Shouldn't the comment above be removed as well?
##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -37,34 +43,330 @@ bool AzureOptions::Equals(const AzureOptions& other) const
{
credentials_kind == other.credentials_kind);
}
+Status AzureOptions::ConfigureAccountKeyCredentials(const std::string&
account_name,
+ const std::string&
account_key) {
+ if (this->backend == AzureBackend::Azurite) {
+ account_blob_url = "http://127.0.0.1:10000/" + account_name + "/";
+ account_dfs_url = "http://127.0.0.1:10000/" + account_name + "/";
+ } else {
+ account_dfs_url = "https://" + account_name + ".dfs.core.windows.net/";
+ account_blob_url = "https://" + account_name + ".blob.core.windows.net/";
+ }
+ storage_credentials_provider =
+
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(account_name,
+
account_key);
+ credentials_kind = AzureCredentialsKind::StorageCredentials;
+ return Status::OK();
+}
+namespace {
+
+// An AzureFileSystem represents a single Azure storage account. AzurePath
describes a
+// container and path within that storage account.
+struct AzurePath {
+ std::string full_path;
+ std::string container;
+ std::string path_to_file;
+ std::vector<std::string> path_to_file_parts;
+
+ static Result<AzurePath> FromString(const std::string& s) {
+ // Example expected string format: testcontainer/testdir/testfile.txt
+ // container = testcontainer
+ // path_to_file = testdir/testfile.txt
+ // path_to_file_parts = [testdir, testfile.txt]
+ if (internal::IsLikelyUri(s)) {
+ return Status::Invalid(
+ "Expected an Azure object path of the form 'container/path...', got
a URI: '",
+ s, "'");
+ }
+ auto src = internal::RemoveTrailingSlash(s);
+ auto input_path = std::string(src.data());
+ src = internal::RemoveLeadingSlash(src);
+ auto first_sep = src.find_first_of(internal::kSep);
+ if (first_sep == 0) {
+ return Status::Invalid("Path cannot start with a separator ('",
input_path, "')");
+ }
+ if (first_sep == std::string::npos) {
+ return AzurePath{std::string(src), std::string(src), "", {}};
+ }
+ AzurePath path;
+ path.full_path = std::string(src);
+ path.container = std::string(src.substr(0, first_sep));
+ path.path_to_file = std::string(src.substr(first_sep + 1));
+ path.path_to_file_parts = internal::SplitAbstractPath(path.path_to_file);
+ RETURN_NOT_OK(Validate(path));
+ return path;
+ }
+
+ static Status Validate(const AzurePath& path) {
+ auto status = internal::ValidateAbstractPathParts(path.path_to_file_parts);
+ if (!status.ok()) {
+ return Status::Invalid(status.message(), " in path ", path.full_path);
+ } else {
+ return status;
+ }
+ }
+
+ AzurePath parent() const {
+ DCHECK(has_parent());
+ auto parent = AzurePath{"", container, "", path_to_file_parts};
+ parent.path_to_file_parts.pop_back();
+ parent.path_to_file =
internal::JoinAbstractPath(parent.path_to_file_parts);
+ if (parent.path_to_file.empty()) {
+ parent.full_path = parent.container;
+ } else {
+ parent.full_path = parent.container + internal::kSep +
parent.path_to_file;
+ }
+ return parent;
+ }
+
+ bool has_parent() const { return !path_to_file.empty(); }
+
+ bool empty() const { return container.empty() && path_to_file.empty(); }
+
+ bool operator==(const AzurePath& other) const {
+ return container == other.container && path_to_file == other.path_to_file;
+ }
+};
+
+Status PathNotFound(const AzurePath& path) {
+ return ::arrow::fs::internal::PathNotFound(path.full_path);
+}
+
+Status NotAFile(const AzurePath& path) {
+ return ::arrow::fs::internal::NotAFile(path.full_path);
+}
+
+Status ValidateFilePath(const AzurePath& path) {
+ if (path.container.empty()) {
+ return PathNotFound(path);
+ }
+
+ if (path.path_to_file.empty()) {
+ return NotAFile(path);
+ }
+ return Status::OK();
+}
Review Comment:
Maybe these checks could be moved to `Validate`. I know they are redundant
when called from the constructor but the two emptiness checks are very cheap.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]