felipecrv commented on code in PR #38269:
URL: https://github.com/apache/arrow/pull/38269#discussion_r1363966231
##########
cpp/src/arrow/filesystem/azurefs.cc:
##########
@@ -37,34 +43,331 @@ bool AzureOptions::Equals(const AzureOptions& other) const
{
credentials_kind == other.credentials_kind);
}
+Status AzureOptions::ConfigureAccountKeyCredentials(const std::string&
account_name,
+ const std::string&
account_key) {
+ if (this->backend == AzureBackend::Azurite) {
+ account_blob_url = "http://127.0.0.1:10000/" + account_name + "/";
+ account_dfs_url = "http://127.0.0.1:10000/" + account_name + "/";
+ } else {
+ account_dfs_url = "https://" + account_name + ".dfs.core.windows.net/";
+ account_blob_url = "https://" + account_name + ".blob.core.windows.net/";
+ }
+ storage_credentials_provider =
+
std::make_shared<Azure::Storage::StorageSharedKeyCredential>(account_name,
+
account_key);
+ credentials_kind = AzureCredentialsKind::StorageCredentials;
+ return Status::OK();
+}
+namespace {
+
+// An AzureFileSystem represents a single Azure storage account. AzurePath
describes a
+// container and path within that storage account.
+struct AzurePath {
+ std::string full_path;
+ std::string container;
+ std::string path_to_file;
+ std::vector<std::string> path_to_file_parts;
+
+ static Result<AzurePath> FromString(const std::string& s) {
+ // Example expected string format: testcontainer/testdir/testfile.txt
+ // container = testcontainer
+ // path_to_file = testdir/testfile.txt
+ // path_to_file_parts = [testdir, testfile.txt]
+ if (internal::IsLikelyUri(s)) {
+ return Status::Invalid(
+ "Expected an Azure object path of the form 'container/path...', got
a URI: '",
+ s, "'");
+ }
+ auto src = internal::RemoveTrailingSlash(s);
+ auto input_path = std::string(src.data());
+ src = internal::RemoveLeadingSlash(src);
+ auto first_sep = src.find_first_of(internal::kSep);
+ if (first_sep == 0) {
+ return Status::Invalid("Path cannot start with a separator ('",
input_path, "')");
+ }
+ if (first_sep == std::string::npos) {
+ return AzurePath{std::string(src), std::string(src), "", {}};
+ }
+ AzurePath path;
+ path.full_path = std::string(src);
+ path.container = std::string(src.substr(0, first_sep));
+ path.path_to_file = std::string(src.substr(first_sep + 1));
+ path.path_to_file_parts = internal::SplitAbstractPath(path.path_to_file);
+ RETURN_NOT_OK(Validate(path));
+ return path;
+ }
+
+ static Status Validate(const AzurePath& path) {
+ auto status = internal::ValidateAbstractPathParts(path.path_to_file_parts);
+ if (!status.ok()) {
+ return Status::Invalid(status.message(), " in path ", path.full_path);
+ } else {
+ return status;
+ }
+ }
+
+ AzurePath parent() const {
+ DCHECK(has_parent());
+ auto parent = AzurePath{"", container, "", path_to_file_parts};
+ parent.path_to_file_parts.pop_back();
+ parent.path_to_file =
internal::JoinAbstractPath(parent.path_to_file_parts);
+ if (parent.path_to_file.empty()) {
+ parent.full_path = parent.container;
+ } else {
+ parent.full_path = parent.container + internal::kSep +
parent.path_to_file;
+ }
+ return parent;
+ }
+
+ bool has_parent() const { return !path_to_file.empty(); }
+
+ bool empty() const { return container.empty() && path_to_file.empty(); }
+
+ bool operator==(const AzurePath& other) const {
+ return container == other.container && path_to_file == other.path_to_file;
+ }
+};
+
+Status PathNotFound(const AzurePath& path) {
+ return ::arrow::fs::internal::PathNotFound(path.full_path);
+}
+
+Status NotAFile(const AzurePath& path) {
+ return ::arrow::fs::internal::NotAFile(path.full_path);
+}
+
+Status ValidateFilePath(const AzurePath& path) {
+ if (path.container.empty()) {
+ return PathNotFound(path);
+ }
+
+ if (path.path_to_file.empty()) {
+ return NotAFile(path);
+ }
+ return Status::OK();
+}
+
+Status ErrorToStatus(const std::string& prefix,
+ const Azure::Storage::StorageException& exception) {
+ return Status::IOError(prefix, " Azure Error: ", exception.what());
+}
+
+template <typename ObjectResult>
+std::shared_ptr<const KeyValueMetadata> GetObjectMetadata(const ObjectResult&
result) {
+ auto md = std::make_shared<KeyValueMetadata>();
+ for (auto prop : result) {
+ md->Append(prop.first, prop.second);
+ }
+ return md;
+}
+
+class ObjectInputFile final : public io::RandomAccessFile {
+ public:
+ ObjectInputFile(std::shared_ptr<Azure::Storage::Blobs::BlobClient>&
blob_client,
+ const io::IOContext& io_context, const AzurePath& path,
+ int64_t size = kNoSize)
+ : blob_client_(std::move(blob_client)),
+ io_context_(io_context),
+ path_(path),
+ content_length_(size) {}
+
+ Status Init() {
+ if (content_length_ != kNoSize) {
+ DCHECK_GE(content_length_, 0);
+ return Status::OK();
+ }
+ try {
+ auto properties = blob_client_->GetProperties();
+ content_length_ = properties.Value.BlobSize;
+ metadata_ = GetObjectMetadata(properties.Value.Metadata);
+ return Status::OK();
+ } catch (const Azure::Storage::StorageException& exception) {
+ if (exception.StatusCode == Azure::Core::Http::HttpStatusCode::NotFound)
{
+ // Could be either container or blob not found.
+ return PathNotFound(path_);
+ }
+ return ErrorToStatus(
+ "When fetching properties for '" + blob_client_->GetUrl() + "': ",
exception);
+ }
+ }
+
+ Status CheckClosed() const {
+ if (closed_) {
+ return Status::Invalid("Operation on closed stream");
+ }
+ return Status::OK();
+ }
+
+ Status CheckPosition(int64_t position, const char* action) const {
+ if (position < 0) {
+ return Status::Invalid("Cannot ", action, " from negative position");
+ }
+ if (position > content_length_) {
+ return Status::IOError("Cannot ", action, " past end of file");
Review Comment:
`DCHECK_GE(content_length_, 0);` is good. My suggestions comes from the
observation that if one were to call this before `Init()` is called
`content_length_` can't be trusted because it can potentially be set to
`kNoSize` by the constructor.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]