This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new b34f9dfb40 ARROW-16864: [Python] Allow omitting S3 external_id and 
session_name with role_arn (#13455)
b34f9dfb40 is described below

commit b34f9dfb40d9b6282fc207cd9983da0cd1abb2d4
Author: Antoine Pitrou <[email protected]>
AuthorDate: Wed Jun 29 10:11:45 2022 +0200

    ARROW-16864: [Python] Allow omitting S3 external_id and session_name with 
role_arn (#13455)
    
    Both external_id and session_name are optional parameters when an Assume 
Role authentication is requested, but omitting them would raise an error.
    
    Authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/filesystem/s3fs.cc | 16 ++++++++++++----
 cpp/src/arrow/filesystem/s3fs.h  |  2 +-
 python/pyarrow/_s3fs.pyx         |  4 ++++
 python/pyarrow/tests/test_fs.py  | 16 +++++++++++++---
 4 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index 70b66938fe..7a3df54350 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -381,13 +381,21 @@ Result<S3Options> S3Options::FromUri(const std::string& 
uri_string,
 }
 
 bool S3Options::Equals(const S3Options& other) const {
+  const int64_t default_metadata_size = default_metadata ? 
default_metadata->size() : 0;
+  const bool default_metadata_equals =
+      default_metadata_size
+          ? (other.default_metadata && 
other.default_metadata->Equals(*default_metadata))
+          : (!other.default_metadata || other.default_metadata->size() == 0);
   return (region == other.region && endpoint_override == 
other.endpoint_override &&
-          scheme == other.scheme && background_writes == 
other.background_writes &&
+          scheme == other.scheme && role_arn == other.role_arn &&
+          session_name == other.session_name && external_id == 
other.external_id &&
+          load_frequency == other.load_frequency &&
+          proxy_options.Equals(other.proxy_options) &&
+          credentials_kind == other.credentials_kind &&
+          background_writes == other.background_writes &&
           allow_bucket_creation == other.allow_bucket_creation &&
           allow_bucket_deletion == other.allow_bucket_deletion &&
-          credentials_kind == other.credentials_kind &&
-          proxy_options.Equals(other.proxy_options) &&
-          GetAccessKey() == other.GetAccessKey() &&
+          default_metadata_equals && GetAccessKey() == other.GetAccessKey() &&
           GetSecretKey() == other.GetSecretKey() &&
           GetSessionToken() == other.GetSessionToken());
 }
diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h
index fa1fdb9671..05fa404162 100644
--- a/cpp/src/arrow/filesystem/s3fs.h
+++ b/cpp/src/arrow/filesystem/s3fs.h
@@ -116,7 +116,7 @@ struct ARROW_EXPORT S3Options {
   /// Optional external idenitifer to pass to STS when assuming a role
   std::string external_id;
   /// Frequency (in seconds) to refresh temporary credentials from assumed role
-  int load_frequency;
+  int load_frequency = 900;
 
   /// If connection is through a proxy, set options here
   S3ProxyOptions proxy_options;
diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx
index f128136de1..fe22612059 100644
--- a/python/pyarrow/_s3fs.pyx
+++ b/python/pyarrow/_s3fs.pyx
@@ -234,6 +234,10 @@ cdef class S3FileSystem(FileSystem):
 
             options = CS3Options.Anonymous()
         elif role_arn:
+            if session_name is None:
+                session_name = ''
+            if external_id is None:
+                external_id = ''
 
             options = CS3Options.FromAssumeRole(
                 tobytes(role_arn),
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index 1e43579a2b..41c242ff83 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -1107,16 +1107,26 @@ def test_s3_options():
     assert isinstance(fs, S3FileSystem)
     assert pickle.loads(pickle.dumps(fs)) == fs
 
+    fs2 = S3FileSystem(role_arn='role')
+    assert isinstance(fs2, S3FileSystem)
+    assert pickle.loads(pickle.dumps(fs2)) == fs2
+    assert fs2 != fs
+
     fs = S3FileSystem(anonymous=True)
     assert isinstance(fs, S3FileSystem)
     assert pickle.loads(pickle.dumps(fs)) == fs
 
-    fs = S3FileSystem(background_writes=True,
-                      default_metadata={"ACL": "authenticated-read",
-                                        "Content-Type": "text/plain"})
+    fs = S3FileSystem(background_writes=True)
     assert isinstance(fs, S3FileSystem)
     assert pickle.loads(pickle.dumps(fs)) == fs
 
+    fs2 = S3FileSystem(background_writes=True,
+                       default_metadata={"ACL": "authenticated-read",
+                                         "Content-Type": "text/plain"})
+    assert isinstance(fs2, S3FileSystem)
+    assert pickle.loads(pickle.dumps(fs2)) == fs2
+    assert fs2 != fs
+
     fs = S3FileSystem(allow_bucket_creation=True, allow_bucket_deletion=True)
     assert isinstance(fs, S3FileSystem)
     assert pickle.loads(pickle.dumps(fs)) == fs

Reply via email to