bkietz commented on a change in pull request #10295:
URL: https://github.com/apache/arrow/pull/10295#discussion_r640771854



##########
File path: cpp/src/arrow/filesystem/s3fs.cc
##########
@@ -684,6 +685,85 @@ Result<S3Model::GetObjectResult> 
GetObjectRange(Aws::S3::S3Client* client,
   return OutcomeToResult(client->GetObject(req));
 }
 
+template <typename ObjectResult>
+std::shared_ptr<const KeyValueMetadata> GetObjectMetadata(const ObjectResult& 
result) {
+  std::vector<std::string> keys, values;
+
+  auto push = [&](std::string k, const Aws::String& v) {
+    if (!v.empty()) {
+      keys.push_back(std::move(k));
+      values.push_back(FromAwsString(v).to_string());
+    }
+  };
+  auto push_datetime = [&](std::string k, const Aws::Utils::DateTime& v) {
+    if (v != Aws::Utils::DateTime(0.0)) {

Review comment:
       I suppose this is unlikely to be a valid "Last-Modified" or "Expires" 
but it still looks odd

##########
File path: cpp/src/arrow/filesystem/test_util.h
##########
@@ -140,6 +140,8 @@ class ARROW_TESTING_EXPORT GenericFileSystemTest {
   virtual bool have_directory_mtimes() const { return true; }
   // - Whether some directory tree deletion tests may fail randomly
   virtual bool have_flaky_directory_tree_deletion() const { return false; }
+  // - Whether the filesystem stores some metadata along files

Review comment:
       ```suggestion
     // - Whether the filesystem stores some metadata alongside files
   ```

##########
File path: cpp/src/arrow/filesystem/filesystem.h
##########
@@ -283,13 +283,17 @@ class ARROW_EXPORT FileSystem : public 
std::enable_shared_from_this<FileSystem>
   ///
   /// If the target already exists, existing data is truncated.
   virtual Result<std::shared_ptr<io::OutputStream>> OpenOutputStream(
-      const std::string& path) = 0;
+      const std::string& path,
+      const std::shared_ptr<const KeyValueMetadata>& metadata) = 0;

Review comment:
       Nit: the produced stream will always take ownership of `metadata`, so 
this shouldn't be a ref
   ```suggestion
         std::shared_ptr<const KeyValueMetadata> metadata) = 0;
   ```

##########
File path: cpp/src/arrow/filesystem/s3fs.cc
##########
@@ -684,6 +685,85 @@ Result<S3Model::GetObjectResult> 
GetObjectRange(Aws::S3::S3Client* client,
   return OutcomeToResult(client->GetObject(req));
 }
 
+template <typename ObjectResult>
+std::shared_ptr<const KeyValueMetadata> GetObjectMetadata(const ObjectResult& 
result) {
+  std::vector<std::string> keys, values;
+
+  auto push = [&](std::string k, const Aws::String& v) {
+    if (!v.empty()) {
+      keys.push_back(std::move(k));
+      values.push_back(FromAwsString(v).to_string());
+    }
+  };
+  auto push_datetime = [&](std::string k, const Aws::Utils::DateTime& v) {
+    if (v != Aws::Utils::DateTime(0.0)) {
+      push(std::move(k), v.ToGmtString(Aws::Utils::DateFormat::ISO_8601));
+    }
+  };
+
+  keys.push_back("Content-Length");
+  values.push_back(std::to_string(result.GetContentLength()));
+
+  push("Cache-Control", result.GetCacheControl());
+  push("Content-Type", result.GetContentType());
+  push("Content-Language", result.GetContentLanguage());
+  push("ETag", result.GetETag());
+  push("VersionId", result.GetVersionId());
+  push_datetime("Last-Modified", result.GetLastModified());
+  push_datetime("Expires", result.GetExpires());
+  return std::make_shared<const KeyValueMetadata>(std::move(keys), 
std::move(values));
+}
+
+template <typename ObjectRequest>
+struct ObjectMetadataSetter {
+  ObjectRequest* req;
+

Review comment:
       ```suggestion
   ```

##########
File path: cpp/src/arrow/filesystem/s3fs.cc
##########
@@ -684,6 +685,85 @@ Result<S3Model::GetObjectResult> 
GetObjectRange(Aws::S3::S3Client* client,
   return OutcomeToResult(client->GetObject(req));
 }
 
+template <typename ObjectResult>
+std::shared_ptr<const KeyValueMetadata> GetObjectMetadata(const ObjectResult& 
result) {
+  std::vector<std::string> keys, values;
+
+  auto push = [&](std::string k, const Aws::String& v) {
+    if (!v.empty()) {
+      keys.push_back(std::move(k));
+      values.push_back(FromAwsString(v).to_string());
+    }
+  };

Review comment:
       You can create a mutable KeyValueMetadata and return it from this 
function (`shared_ptr<T>` implicitly converts to `shared_ptr<const T>`)
   ```suggestion
     std::shared_ptr<KeyValueMetadata> metadata;
   
     auto push = [&](std::string k, const Aws::String& v) {
       if (!v.empty()) {
         metadata->Append(std::move(k), FromAwsString(v).to_string());
       }
     };
   ```

##########
File path: cpp/src/arrow/io/interfaces.cc
##########
@@ -105,6 +107,22 @@ Result<util::string_view> InputStream::Peek(int64_t 
ARROW_ARG_UNUSED(nbytes)) {
 
 bool InputStream::supports_zero_copy() const { return false; }
 
+Result<std::shared_ptr<const KeyValueMetadata>> InputStream::ReadMetadata() {
+  return std::shared_ptr<const KeyValueMetadata>{};
+}
+
+// Default ReadMetadataAsync() implementation: simply issue the read on the 
context's
+// executor
+Future<std::shared_ptr<const KeyValueMetadata>> InputStream::ReadMetadataAsync(
+    const IOContext& ctx) {
+  auto self = shared_from_this();
+  return DeferNotOk(internal::SubmitIO(ctx, [self] { return 
self->ReadMetadata(); }));

Review comment:
       Could you clarify ownership of `ctx`? SubmitIO makes a copy but doesn't 
seem to need it. If the copy is intentional, please unref the `ctx` arg




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to