This is an automated email from the ASF dual-hosted git repository.

pcmoritz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new b43c6f6b18 ARROW-17079: [C++] Improve error messages for AWS S3 calls 
(#13979)
b43c6f6b18 is described below

commit b43c6f6b18acee019a500bc6a05af18c07f2ca79
Author: Philipp Moritz <[email protected]>
AuthorDate: Mon Aug 29 20:46:34 2022 -0700

    ARROW-17079: [C++] Improve error messages for AWS S3 calls (#13979)
    
    First part of improving error messages from S3 operations. We include the 
operation that failed in the error message now. This makes it easier to debug 
problems (e.g. bucket permissions or other infrastructure hickups) because it 
allows to re-run the operation that failed with the AWS CLI for further 
diagnosing what is going wrong.
    
    Example (changes in bold):
    
    > When getting information for key 'test.csv' in bucket 
'pcmoritz-test-bucket-arrow-errors': AWS Error [code 15] **during HeadObject 
operation**: No response body.
    
    Authored-by: Philipp Moritz <[email protected]>
    Signed-off-by: Philipp Moritz <[email protected]>
---
 cpp/src/arrow/filesystem/s3_internal.h     | 46 +++++++++++-----------------
 cpp/src/arrow/filesystem/s3fs.cc           | 48 +++++++++++++++---------------
 cpp/src/arrow/filesystem/s3fs_benchmark.cc |  6 ++--
 cpp/src/arrow/filesystem/s3fs_test.cc      | 14 ++++-----
 4 files changed, 52 insertions(+), 62 deletions(-)

diff --git a/cpp/src/arrow/filesystem/s3_internal.h 
b/cpp/src/arrow/filesystem/s3_internal.h
index ceb92b5548..ae938c1760 100644
--- a/cpp/src/arrow/filesystem/s3_internal.h
+++ b/cpp/src/arrow/filesystem/s3_internal.h
@@ -39,19 +39,6 @@ namespace arrow {
 namespace fs {
 namespace internal {
 
-#define ARROW_AWS_ASSIGN_OR_RAISE_IMPL(outcome_name, lhs, rexpr) \
-  auto outcome_name = (rexpr);                                   \
-  if (!outcome_name.IsSuccess()) {                               \
-    return ErrorToStatus(outcome_name.GetError());               \
-  }                                                              \
-  lhs = std::move(outcome_name).GetResultWithOwnership();
-
-#define ARROW_AWS_ASSIGN_OR_RAISE_NAME(x, y) ARROW_CONCAT(x, y)
-
-#define ARROW_AWS_ASSIGN_OR_RAISE(lhs, rexpr) \
-  ARROW_AWS_ASSIGN_OR_RAISE_IMPL(             \
-      ARROW_AWS_ASSIGN_OR_RAISE_NAME(_aws_error_or_value, __COUNTER__), lhs, 
rexpr);
-
 // XXX Should we expose this at some point?
 enum class S3Backend { Amazon, Minio, Other };
 
@@ -104,60 +91,63 @@ inline bool IsAlreadyExists(const 
Aws::Client::AWSError<Aws::S3::S3Errors>& erro
 // TODO qualify error messages with a prefix indicating context
 // (e.g. "When completing multipart upload to bucket 'xxx', key 'xxx': ...")
 template <typename ErrorType>
-Status ErrorToStatus(const std::string& prefix,
+Status ErrorToStatus(const std::string& prefix, const std::string& operation,
                      const Aws::Client::AWSError<ErrorType>& error) {
   // XXX Handle fine-grained error types
   // See
   // 
https://sdk.amazonaws.com/cpp/api/LATEST/namespace_aws_1_1_s3.html#ae3f82f8132b619b6e91c88a9f1bde371
   return Status::IOError(prefix, "AWS Error [code ",
-                         static_cast<int>(error.GetErrorType()),
-                         "]: ", error.GetMessage());
+                         static_cast<int>(error.GetErrorType()), "] during ", 
operation,
+                         " operation: ", error.GetMessage());
 }
 
 template <typename ErrorType, typename... Args>
-Status ErrorToStatus(const std::tuple<Args&...>& prefix,
+Status ErrorToStatus(const std::tuple<Args&...>& prefix, const std::string& 
operation,
                      const Aws::Client::AWSError<ErrorType>& error) {
   std::stringstream ss;
   ::arrow::internal::PrintTuple(&ss, prefix);
-  return ErrorToStatus(ss.str(), error);
+  return ErrorToStatus(ss.str(), operation, error);
 }
 
 template <typename ErrorType>
-Status ErrorToStatus(const Aws::Client::AWSError<ErrorType>& error) {
-  return ErrorToStatus(std::string(), error);
+Status ErrorToStatus(const std::string& operation,
+                     const Aws::Client::AWSError<ErrorType>& error) {
+  return ErrorToStatus(std::string(), operation, error);
 }
 
 template <typename AwsResult, typename Error>
-Status OutcomeToStatus(const std::string& prefix,
+Status OutcomeToStatus(const std::string& prefix, const std::string& operation,
                        const Aws::Utils::Outcome<AwsResult, Error>& outcome) {
   if (outcome.IsSuccess()) {
     return Status::OK();
   } else {
-    return ErrorToStatus(prefix, outcome.GetError());
+    return ErrorToStatus(prefix, operation, outcome.GetError());
   }
 }
 
 template <typename AwsResult, typename Error, typename... Args>
-Status OutcomeToStatus(const std::tuple<Args&...>& prefix,
+Status OutcomeToStatus(const std::tuple<Args&...>& prefix, const std::string& 
operation,
                        const Aws::Utils::Outcome<AwsResult, Error>& outcome) {
   if (outcome.IsSuccess()) {
     return Status::OK();
   } else {
-    return ErrorToStatus(prefix, outcome.GetError());
+    return ErrorToStatus(prefix, operation, outcome.GetError());
   }
 }
 
 template <typename AwsResult, typename Error>
-Status OutcomeToStatus(const Aws::Utils::Outcome<AwsResult, Error>& outcome) {
-  return OutcomeToStatus(std::string(), outcome);
+Status OutcomeToStatus(const std::string& operation,
+                       const Aws::Utils::Outcome<AwsResult, Error>& outcome) {
+  return OutcomeToStatus(std::string(), operation, outcome);
 }
 
 template <typename AwsResult, typename Error>
-Result<AwsResult> OutcomeToResult(Aws::Utils::Outcome<AwsResult, Error> 
outcome) {
+Result<AwsResult> OutcomeToResult(const std::string& operation,
+                                  Aws::Utils::Outcome<AwsResult, Error> 
outcome) {
   if (outcome.IsSuccess()) {
     return std::move(outcome).GetResultWithOwnership();
   } else {
-    return ErrorToStatus(outcome.GetError());
+    return ErrorToStatus(operation, outcome.GetError());
   }
 }
 
diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index fb933e4d4d..878f54812b 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -629,7 +629,7 @@ class S3Client : public Aws::S3::S3Client {
       } else if (!outcome.IsSuccess()) {
         return ErrorToStatus(std::forward_as_tuple("When resolving region for 
bucket '",
                                                    request.GetBucket(), "': "),
-                             outcome.GetError());
+                             "HeadBucket", outcome.GetError());
       } else {
         return Status::IOError("When resolving region for bucket '", 
request.GetBucket(),
                                "': missing 'x-amz-bucket-region' header in 
response");
@@ -945,7 +945,7 @@ Result<S3Model::GetObjectResult> 
GetObjectRange(Aws::S3::S3Client* client,
   req.SetKey(ToAwsString(path.key));
   req.SetRange(ToAwsString(FormatRange(start, length)));
   req.SetResponseStreamFactory(AwsWriteableStreamFactory(out, length));
-  return OutcomeToResult(client->GetObject(req));
+  return OutcomeToResult("GetObject", client->GetObject(req));
 }
 
 template <typename ObjectResult>
@@ -1076,7 +1076,7 @@ class ObjectInputFile final : public io::RandomAccessFile 
{
         return ErrorToStatus(
             std::forward_as_tuple("When reading information for key '", 
path_.key,
                                   "' in bucket '", path_.bucket, "': "),
-            outcome.GetError());
+            "HeadObject", outcome.GetError());
       }
     }
     content_length_ = outcome.GetResult().GetContentLength();
@@ -1250,7 +1250,7 @@ class ObjectOutputStream final : public io::OutputStream {
       return ErrorToStatus(
           std::forward_as_tuple("When initiating multiple part upload for key 
'",
                                 path_.key, "' in bucket '", path_.bucket, "': 
"),
-          outcome.GetError());
+          "CreateMultipartUpload", outcome.GetError());
     }
     upload_id_ = outcome.GetResult().GetUploadId();
     upload_state_ = std::make_shared<UploadState>();
@@ -1273,7 +1273,7 @@ class ObjectOutputStream final : public io::OutputStream {
       return ErrorToStatus(
           std::forward_as_tuple("When aborting multiple part upload for key 
'", path_.key,
                                 "' in bucket '", path_.bucket, "': "),
-          outcome.GetError());
+          "AbortMultipartUpload", outcome.GetError());
     }
     current_part_.reset();
     client_ = nullptr;
@@ -1321,7 +1321,7 @@ class ObjectOutputStream final : public io::OutputStream {
         return ErrorToStatus(
             std::forward_as_tuple("When completing multiple part upload for 
key '",
                                   path_.key, "' in bucket '", path_.bucket, 
"': "),
-            outcome.GetError());
+            "CompleteMultipartUpload", outcome.GetError());
       }
 
       client_ = nullptr;
@@ -1514,7 +1514,7 @@ class ObjectOutputStream final : public io::OutputStream {
     return ErrorToStatus(
         std::forward_as_tuple("When uploading part for key '", req.GetKey(),
                               "' in bucket '", req.GetBucket(), "': "),
-        outcome.GetError());
+        "UploadPart", outcome.GetError());
   }
 
  protected:
@@ -1744,7 +1744,7 @@ class S3FileSystem::Impl : public 
std::enable_shared_from_this<S3FileSystem::Imp
       if (!IsNotFound(outcome.GetError())) {
         return ErrorToStatus(std::forward_as_tuple(
                                  "When testing for existence of bucket '", 
bucket, "': "),
-                             outcome.GetError());
+                             "HeadBucket", outcome.GetError());
       }
       return false;
     }
@@ -1763,7 +1763,7 @@ class S3FileSystem::Impl : public 
std::enable_shared_from_this<S3FileSystem::Imp
         return Status::OK();
       } else if (!IsNotFound(outcome.GetError())) {
         return ErrorToStatus(
-            std::forward_as_tuple("When creating bucket '", bucket, "': "),
+            std::forward_as_tuple("When creating bucket '", bucket, "': "), 
"HeadBucket",
             outcome.GetError());
       }
 
@@ -1790,7 +1790,7 @@ class S3FileSystem::Impl : public 
std::enable_shared_from_this<S3FileSystem::Imp
     auto outcome = client_->CreateBucket(req);
     if (!outcome.IsSuccess() && !IsAlreadyExists(outcome.GetError())) {
       return ErrorToStatus(std::forward_as_tuple("When creating bucket '", 
bucket, "': "),
-                           outcome.GetError());
+                           "CreateBucket", outcome.GetError());
     }
     return Status::OK();
   }
@@ -1803,7 +1803,7 @@ class S3FileSystem::Impl : public 
std::enable_shared_from_this<S3FileSystem::Imp
     req.SetBody(std::make_shared<std::stringstream>(""));
     return OutcomeToStatus(
         std::forward_as_tuple("When creating key '", key, "' in bucket '", 
bucket, "': "),
-        client_->PutObject(req));
+        "PutObject", client_->PutObject(req));
   }
 
   Status CreateEmptyDir(const std::string& bucket, const std::string& key) {
@@ -1817,7 +1817,7 @@ class S3FileSystem::Impl : public 
std::enable_shared_from_this<S3FileSystem::Imp
     req.SetKey(ToAwsString(key));
     return OutcomeToStatus(
         std::forward_as_tuple("When delete key '", key, "' in bucket '", 
bucket, "': "),
-        client_->DeleteObject(req));
+        "DeleteObject", client_->DeleteObject(req));
   }
 
   Status CopyObject(const S3Path& src_path, const S3Path& dest_path) {
@@ -1831,7 +1831,7 @@ class S3FileSystem::Impl : public 
std::enable_shared_from_this<S3FileSystem::Imp
         std::forward_as_tuple("When copying key '", src_path.key, "' in bucket 
'",
                               src_path.bucket, "' to key '", dest_path.key,
                               "' in bucket '", dest_path.bucket, "': "),
-        client_->CopyObject(req));
+        "CopyObject", client_->CopyObject(req));
   }
 
   // On Minio, an empty "directory" doesn't satisfy the same API requests as
@@ -1885,7 +1885,7 @@ class S3FileSystem::Impl : public 
std::enable_shared_from_this<S3FileSystem::Imp
     }
     return ErrorToStatus(std::forward_as_tuple("When reading information for 
key '", key,
                                                "' in bucket '", bucket, "': "),
-                         outcome.GetError());
+                         "HeadObject", outcome.GetError());
   }
 
   Result<bool> IsEmptyDirectory(
@@ -1911,7 +1911,7 @@ class S3FileSystem::Impl : public 
std::enable_shared_from_this<S3FileSystem::Imp
     return ErrorToStatus(
         std::forward_as_tuple("When listing objects under key '", path.key,
                               "' in bucket '", path.bucket, "': "),
-        outcome.GetError());
+        "ListObjectsV2", outcome.GetError());
   }
 
   Status CheckNestingDepth(int32_t nesting_depth) {
@@ -1991,7 +1991,7 @@ class S3FileSystem::Impl : public 
std::enable_shared_from_this<S3FileSystem::Imp
       }
       return ErrorToStatus(std::forward_as_tuple("When listing objects under 
key '", key,
                                                  "' in bucket '", bucket, "': 
"),
-                           error);
+                           "ListObjectsV2", error);
     };
 
     auto handle_recursion = [&](int32_t nesting_depth) -> Result<bool> {
@@ -2029,7 +2029,7 @@ class S3FileSystem::Impl : public 
std::enable_shared_from_this<S3FileSystem::Imp
       }
       return ErrorToStatus(std::forward_as_tuple("When listing objects under 
key '", key,
                                                  "' in bucket '", bucket, "': 
"),
-                           error);
+                           "ListObjectsV2", error);
     };
 
     auto handle_recursion = [producer, select,
@@ -2091,7 +2091,7 @@ class S3FileSystem::Impl : public 
std::enable_shared_from_this<S3FileSystem::Imp
     auto handle_error = [=](const AWSError<S3Errors>& error) -> Status {
       return ErrorToStatus(std::forward_as_tuple("When listing objects under 
key '", key,
                                                  "' in bucket '", bucket, "': 
"),
-                           error);
+                           "ListObjectsV2", error);
     };
 
     auto self = shared_from_this();
@@ -2113,7 +2113,7 @@ class S3FileSystem::Impl : public 
std::enable_shared_from_this<S3FileSystem::Imp
 
       Status operator()(const S3Model::DeleteObjectsOutcome& outcome) {
         if (!outcome.IsSuccess()) {
-          return ErrorToStatus(outcome.GetError());
+          return ErrorToStatus("DeleteObjects", outcome.GetError());
         }
         // Also need to check per-key errors, even on successful outcome
         // See
@@ -2201,7 +2201,7 @@ class S3FileSystem::Impl : public 
std::enable_shared_from_this<S3FileSystem::Imp
   static Result<std::vector<std::string>> ProcessListBuckets(
       const Aws::S3::Model::ListBucketsOutcome& outcome) {
     if (!outcome.IsSuccess()) {
-      return ErrorToStatus(std::forward_as_tuple("When listing buckets: "),
+      return ErrorToStatus(std::forward_as_tuple("When listing buckets: "), 
"ListBuckets",
                            outcome.GetError());
     }
     std::vector<std::string> buckets;
@@ -2308,7 +2308,7 @@ Result<FileInfo> S3FileSystem::GetFileInfo(const 
std::string& s) {
         return ErrorToStatus(
             std::forward_as_tuple("When getting information for bucket '", 
path.bucket,
                                   "': "),
-            outcome.GetError());
+            "HeadBucket", outcome.GetError());
       }
       info.set_type(FileType::NotFound);
       return info;
@@ -2333,7 +2333,7 @@ Result<FileInfo> S3FileSystem::GetFileInfo(const 
std::string& s) {
       return ErrorToStatus(
           std::forward_as_tuple("When getting information for key '", path.key,
                                 "' in bucket '", path.bucket, "': "),
-          outcome.GetError());
+          "HeadObject", outcome.GetError());
     }
     // Not found => perhaps it's an empty "directory"
     ARROW_ASSIGN_OR_RAISE(bool is_dir, impl_->IsEmptyDirectory(path, 
&outcome));
@@ -2478,7 +2478,7 @@ Status S3FileSystem::DeleteDir(const std::string& s) {
     req.SetBucket(ToAwsString(path.bucket));
     return OutcomeToStatus(
         std::forward_as_tuple("When deleting bucket '", path.bucket, "': "),
-        impl_->client_->DeleteBucket(req));
+        "DeleteBucket", impl_->client_->DeleteBucket(req));
   } else if (path.key.empty()) {
     return Status::IOError("Would delete bucket '", path.bucket, "'. ",
                            "To delete buckets, enable the 
allow_bucket_deletion option.");
@@ -2536,7 +2536,7 @@ Status S3FileSystem::DeleteFile(const std::string& s) {
       return ErrorToStatus(
           std::forward_as_tuple("When getting information for key '", path.key,
                                 "' in bucket '", path.bucket, "': "),
-          outcome.GetError());
+          "HeadObject", outcome.GetError());
     }
   }
   // Object found, delete it
diff --git a/cpp/src/arrow/filesystem/s3fs_benchmark.cc 
b/cpp/src/arrow/filesystem/s3fs_benchmark.cc
index c2922b4c28..2121642963 100644
--- a/cpp/src/arrow/filesystem/s3fs_benchmark.cc
+++ b/cpp/src/arrow/filesystem/s3fs_benchmark.cc
@@ -125,14 +125,14 @@ class MinioFixture : public benchmark::Fixture {
   Status MakeBucket() {
     Aws::S3::Model::HeadBucketRequest head;
     head.SetBucket(ToAwsString(bucket_));
-    const Status st = OutcomeToStatus(client_->HeadBucket(head));
+    const Status st = OutcomeToStatus("HeadBucket", client_->HeadBucket(head));
     if (st.ok()) {
       // Bucket exists already
       return st;
     }
     Aws::S3::Model::CreateBucketRequest req;
     req.SetBucket(ToAwsString(bucket_));
-    return OutcomeToStatus(client_->CreateBucket(req));
+    return OutcomeToStatus("CreateBucket", client_->CreateBucket(req));
   }
 
   /// Make an object with dummy data.
@@ -141,7 +141,7 @@ class MinioFixture : public benchmark::Fixture {
     req.SetBucket(ToAwsString(bucket_));
     req.SetKey(ToAwsString(name));
     req.SetBody(std::make_shared<std::stringstream>(std::string(size, 'a')));
-    return OutcomeToStatus(client_->PutObject(req));
+    return OutcomeToStatus("PutObject", client_->PutObject(req));
   }
 
   /// Make an object with Parquet data.
diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc 
b/cpp/src/arrow/filesystem/s3fs_test.cc
index 1d89e2da71..69ad5306a0 100644
--- a/cpp/src/arrow/filesystem/s3fs_test.cc
+++ b/cpp/src/arrow/filesystem/s3fs_test.cc
@@ -413,28 +413,28 @@ class TestS3FS : public S3TestMixin {
     {
       Aws::S3::Model::CreateBucketRequest req;
       req.SetBucket(ToAwsString("bucket"));
-      ASSERT_OK(OutcomeToStatus(client_->CreateBucket(req)));
+      ASSERT_OK(OutcomeToStatus("CreateBucket", client_->CreateBucket(req)));
       req.SetBucket(ToAwsString("empty-bucket"));
-      ASSERT_OK(OutcomeToStatus(client_->CreateBucket(req)));
+      ASSERT_OK(OutcomeToStatus("CreateBucket", client_->CreateBucket(req)));
     }
     {
       Aws::S3::Model::PutObjectRequest req;
       req.SetBucket(ToAwsString("bucket"));
       req.SetKey(ToAwsString("emptydir/"));
       req.SetBody(std::make_shared<std::stringstream>(""));
-      ASSERT_OK(OutcomeToStatus(client_->PutObject(req)));
+      ASSERT_OK(OutcomeToStatus("PutObject", client_->PutObject(req)));
       // NOTE: no need to create intermediate "directories" somedir/ and
       // somedir/subdir/
       req.SetKey(ToAwsString("somedir/subdir/subfile"));
       req.SetBody(std::make_shared<std::stringstream>("sub data"));
-      ASSERT_OK(OutcomeToStatus(client_->PutObject(req)));
+      ASSERT_OK(OutcomeToStatus("PutObject", client_->PutObject(req)));
       req.SetKey(ToAwsString("somefile"));
       req.SetBody(std::make_shared<std::stringstream>("some data"));
       req.SetContentType("x-arrow/test");
-      ASSERT_OK(OutcomeToStatus(client_->PutObject(req)));
+      ASSERT_OK(OutcomeToStatus("PutObject", client_->PutObject(req)));
       req.SetKey(ToAwsString("otherdir/1/2/3/otherfile"));
       req.SetBody(std::make_shared<std::stringstream>("other data"));
-      ASSERT_OK(OutcomeToStatus(client_->PutObject(req)));
+      ASSERT_OK(OutcomeToStatus("PutObject", client_->PutObject(req)));
     }
   }
 
@@ -1203,7 +1203,7 @@ class TestS3FSGeneric : public S3TestMixin, public 
GenericFileSystemTest {
     {
       Aws::S3::Model::CreateBucketRequest req;
       req.SetBucket(ToAwsString("s3fs-test-bucket"));
-      ASSERT_OK(OutcomeToStatus(client_->CreateBucket(req)));
+      ASSERT_OK(OutcomeToStatus("CreateBucket", client_->CreateBucket(req)));
     }
 
     options_.ConfigureAccessKey(minio_->access_key(), minio_->secret_key());

Reply via email to