This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new b34f9dfb40 ARROW-16864: [Python] Allow omitting S3 external_id and
session_name with role_arn (#13455)
b34f9dfb40 is described below
commit b34f9dfb40d9b6282fc207cd9983da0cd1abb2d4
Author: Antoine Pitrou <[email protected]>
AuthorDate: Wed Jun 29 10:11:45 2022 +0200
ARROW-16864: [Python] Allow omitting S3 external_id and session_name with
role_arn (#13455)
Both external_id and session_name are optional parameters when an Assume
Role authentication is requested, but omitting them would raise an error.
Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/filesystem/s3fs.cc | 16 ++++++++++++----
cpp/src/arrow/filesystem/s3fs.h | 2 +-
python/pyarrow/_s3fs.pyx | 4 ++++
python/pyarrow/tests/test_fs.py | 16 +++++++++++++---
4 files changed, 30 insertions(+), 8 deletions(-)
diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc
index 70b66938fe..7a3df54350 100644
--- a/cpp/src/arrow/filesystem/s3fs.cc
+++ b/cpp/src/arrow/filesystem/s3fs.cc
@@ -381,13 +381,21 @@ Result<S3Options> S3Options::FromUri(const std::string&
uri_string,
}
bool S3Options::Equals(const S3Options& other) const {
+ const int64_t default_metadata_size = default_metadata ?
default_metadata->size() : 0;
+ const bool default_metadata_equals =
+ default_metadata_size
+ ? (other.default_metadata &&
other.default_metadata->Equals(*default_metadata))
+ : (!other.default_metadata || other.default_metadata->size() == 0);
return (region == other.region && endpoint_override ==
other.endpoint_override &&
- scheme == other.scheme && background_writes ==
other.background_writes &&
+ scheme == other.scheme && role_arn == other.role_arn &&
+ session_name == other.session_name && external_id ==
other.external_id &&
+ load_frequency == other.load_frequency &&
+ proxy_options.Equals(other.proxy_options) &&
+ credentials_kind == other.credentials_kind &&
+ background_writes == other.background_writes &&
allow_bucket_creation == other.allow_bucket_creation &&
allow_bucket_deletion == other.allow_bucket_deletion &&
- credentials_kind == other.credentials_kind &&
- proxy_options.Equals(other.proxy_options) &&
- GetAccessKey() == other.GetAccessKey() &&
+ default_metadata_equals && GetAccessKey() == other.GetAccessKey() &&
GetSecretKey() == other.GetSecretKey() &&
GetSessionToken() == other.GetSessionToken());
}
diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h
index fa1fdb9671..05fa404162 100644
--- a/cpp/src/arrow/filesystem/s3fs.h
+++ b/cpp/src/arrow/filesystem/s3fs.h
@@ -116,7 +116,7 @@ struct ARROW_EXPORT S3Options {
/// Optional external idenitifer to pass to STS when assuming a role
std::string external_id;
/// Frequency (in seconds) to refresh temporary credentials from assumed role
- int load_frequency;
+ int load_frequency = 900;
/// If connection is through a proxy, set options here
S3ProxyOptions proxy_options;
diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx
index f128136de1..fe22612059 100644
--- a/python/pyarrow/_s3fs.pyx
+++ b/python/pyarrow/_s3fs.pyx
@@ -234,6 +234,10 @@ cdef class S3FileSystem(FileSystem):
options = CS3Options.Anonymous()
elif role_arn:
+ if session_name is None:
+ session_name = ''
+ if external_id is None:
+ external_id = ''
options = CS3Options.FromAssumeRole(
tobytes(role_arn),
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index 1e43579a2b..41c242ff83 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -1107,16 +1107,26 @@ def test_s3_options():
assert isinstance(fs, S3FileSystem)
assert pickle.loads(pickle.dumps(fs)) == fs
+ fs2 = S3FileSystem(role_arn='role')
+ assert isinstance(fs2, S3FileSystem)
+ assert pickle.loads(pickle.dumps(fs2)) == fs2
+ assert fs2 != fs
+
fs = S3FileSystem(anonymous=True)
assert isinstance(fs, S3FileSystem)
assert pickle.loads(pickle.dumps(fs)) == fs
- fs = S3FileSystem(background_writes=True,
- default_metadata={"ACL": "authenticated-read",
- "Content-Type": "text/plain"})
+ fs = S3FileSystem(background_writes=True)
assert isinstance(fs, S3FileSystem)
assert pickle.loads(pickle.dumps(fs)) == fs
+ fs2 = S3FileSystem(background_writes=True,
+ default_metadata={"ACL": "authenticated-read",
+ "Content-Type": "text/plain"})
+ assert isinstance(fs2, S3FileSystem)
+ assert pickle.loads(pickle.dumps(fs2)) == fs2
+ assert fs2 != fs
+
fs = S3FileSystem(allow_bucket_creation=True, allow_bucket_deletion=True)
assert isinstance(fs, S3FileSystem)
assert pickle.loads(pickle.dumps(fs)) == fs