bkietz commented on a change in pull request #10295: URL: https://github.com/apache/arrow/pull/10295#discussion_r640771854
########## File path: cpp/src/arrow/filesystem/s3fs.cc ########## @@ -684,6 +685,85 @@ Result<S3Model::GetObjectResult> GetObjectRange(Aws::S3::S3Client* client, return OutcomeToResult(client->GetObject(req)); } +template <typename ObjectResult> +std::shared_ptr<const KeyValueMetadata> GetObjectMetadata(const ObjectResult& result) { + std::vector<std::string> keys, values; + + auto push = [&](std::string k, const Aws::String& v) { + if (!v.empty()) { + keys.push_back(std::move(k)); + values.push_back(FromAwsString(v).to_string()); + } + }; + auto push_datetime = [&](std::string k, const Aws::Utils::DateTime& v) { + if (v != Aws::Utils::DateTime(0.0)) { Review comment: I suppose this is unlikely to be a valid "Last-Modified" or "Expires" but it still looks odd ########## File path: cpp/src/arrow/filesystem/test_util.h ########## @@ -140,6 +140,8 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest { virtual bool have_directory_mtimes() const { return true; } // - Whether some directory tree deletion tests may fail randomly virtual bool have_flaky_directory_tree_deletion() const { return false; } + // - Whether the filesystem stores some metadata along files Review comment: ```suggestion // - Whether the filesystem stores some metadata alongside files ``` ########## File path: cpp/src/arrow/filesystem/filesystem.h ########## @@ -283,13 +283,17 @@ class ARROW_EXPORT FileSystem : public std::enable_shared_from_this<FileSystem> /// /// If the target already exists, existing data is truncated. virtual Result<std::shared_ptr<io::OutputStream>> OpenOutputStream( - const std::string& path) = 0; + const std::string& path, + const std::shared_ptr<const KeyValueMetadata>& metadata) = 0; Review comment: Nit: the produced stream will always take ownership of `metadata`, so this shouldn't be a ref ```suggestion std::shared_ptr<const KeyValueMetadata> metadata) = 0; ``` ########## File path: cpp/src/arrow/filesystem/s3fs.cc ########## @@ -684,6 +685,85 @@ Result<S3Model::GetObjectResult> GetObjectRange(Aws::S3::S3Client* client, return OutcomeToResult(client->GetObject(req)); } +template <typename ObjectResult> +std::shared_ptr<const KeyValueMetadata> GetObjectMetadata(const ObjectResult& result) { + std::vector<std::string> keys, values; + + auto push = [&](std::string k, const Aws::String& v) { + if (!v.empty()) { + keys.push_back(std::move(k)); + values.push_back(FromAwsString(v).to_string()); + } + }; + auto push_datetime = [&](std::string k, const Aws::Utils::DateTime& v) { + if (v != Aws::Utils::DateTime(0.0)) { + push(std::move(k), v.ToGmtString(Aws::Utils::DateFormat::ISO_8601)); + } + }; + + keys.push_back("Content-Length"); + values.push_back(std::to_string(result.GetContentLength())); + + push("Cache-Control", result.GetCacheControl()); + push("Content-Type", result.GetContentType()); + push("Content-Language", result.GetContentLanguage()); + push("ETag", result.GetETag()); + push("VersionId", result.GetVersionId()); + push_datetime("Last-Modified", result.GetLastModified()); + push_datetime("Expires", result.GetExpires()); + return std::make_shared<const KeyValueMetadata>(std::move(keys), std::move(values)); +} + +template <typename ObjectRequest> +struct ObjectMetadataSetter { + ObjectRequest* req; + Review comment: ```suggestion ``` ########## File path: cpp/src/arrow/filesystem/s3fs.cc ########## @@ -684,6 +685,85 @@ Result<S3Model::GetObjectResult> GetObjectRange(Aws::S3::S3Client* client, return OutcomeToResult(client->GetObject(req)); } +template <typename ObjectResult> +std::shared_ptr<const KeyValueMetadata> GetObjectMetadata(const ObjectResult& result) { + std::vector<std::string> keys, values; + + auto push = [&](std::string k, const Aws::String& v) { + if (!v.empty()) { + keys.push_back(std::move(k)); + values.push_back(FromAwsString(v).to_string()); + } + }; Review comment: You can create a mutable KeyValueMetadata and return it from this function (`shared_ptr<T>` implicitly converts to `shared_ptr<const T>`) ```suggestion std::shared_ptr<KeyValueMetadata> metadata; auto push = [&](std::string k, const Aws::String& v) { if (!v.empty()) { metadata->Append(std::move(k), FromAwsString(v).to_string()); } }; ``` ########## File path: cpp/src/arrow/io/interfaces.cc ########## @@ -105,6 +107,22 @@ Result<util::string_view> InputStream::Peek(int64_t ARROW_ARG_UNUSED(nbytes)) { bool InputStream::supports_zero_copy() const { return false; } +Result<std::shared_ptr<const KeyValueMetadata>> InputStream::ReadMetadata() { + return std::shared_ptr<const KeyValueMetadata>{}; +} + +// Default ReadMetadataAsync() implementation: simply issue the read on the context's +// executor +Future<std::shared_ptr<const KeyValueMetadata>> InputStream::ReadMetadataAsync( + const IOContext& ctx) { + auto self = shared_from_this(); + return DeferNotOk(internal::SubmitIO(ctx, [self] { return self->ReadMetadata(); })); Review comment: Could you clarify ownership of `ctx`? SubmitIO makes a copy but doesn't seem to need it. If the copy is intentional, please unref the `ctx` arg -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org