This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 3d45ac9653 GH-39779: [Python] Expose force_virtual_addressing in
PyArrow (#39819)
3d45ac9653 is described below
commit 3d45ac96534fc76b820b488aa02182e6b93a388f
Author: y.yoshida5 <[email protected]>
AuthorDate: Thu Feb 1 22:36:59 2024 +0900
GH-39779: [Python] Expose force_virtual_addressing in PyArrow (#39819)
### Rationale for this change / What changes are included in this PR?
To expose force_virtual_addressing in PyArrow.
### Are these changes tested?
Existing unit tests are not broken, and a new test case have been added.
### Are there any user-facing changes?
pyarrow.fs.S3FileSystem: it becomes possible to specify the argument
'force_virtual_addressing'.
* Closes: #39779
Authored-by: yo1956 <[email protected]>
Signed-off-by: Joris Van den Bossche <[email protected]>
---
python/pyarrow/_s3fs.pyx | 11 ++++++++++-
python/pyarrow/includes/libarrow_fs.pxd | 1 +
python/pyarrow/tests/test_fs.py | 4 ++++
3 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx
index 13b8c748cb..f5bab99a49 100644
--- a/python/pyarrow/_s3fs.pyx
+++ b/python/pyarrow/_s3fs.pyx
@@ -245,6 +245,11 @@ cdef class S3FileSystem(FileSystem):
retry_strategy : S3RetryStrategy, default
AwsStandardS3RetryStrategy(max_attempts=3)
The retry strategy to use with S3; fail after max_attempts. Available
strategies are AwsStandardS3RetryStrategy, AwsDefaultS3RetryStrategy.
+ force_virtual_addressing : bool, default False
+ Whether to use virtual addressing of buckets.
+ If true, then virtual addressing is always enabled.
+ If false, then virtual addressing is only enabled if
`endpoint_override` is empty.
+ This can be used for non-AWS backends that only support virtual
hosted-style access.
Examples
--------
@@ -268,7 +273,9 @@ cdef class S3FileSystem(FileSystem):
role_arn=None, session_name=None, external_id=None,
load_frequency=900, proxy_options=None,
allow_bucket_creation=False, allow_bucket_deletion=False,
- retry_strategy: S3RetryStrategy =
AwsStandardS3RetryStrategy(max_attempts=3)):
+ retry_strategy: S3RetryStrategy = AwsStandardS3RetryStrategy(
+ max_attempts=3),
+ force_virtual_addressing=False):
cdef:
optional[CS3Options] options
shared_ptr[CS3FileSystem] wrapped
@@ -380,6 +387,7 @@ cdef class S3FileSystem(FileSystem):
options.value().allow_bucket_creation = allow_bucket_creation
options.value().allow_bucket_deletion = allow_bucket_deletion
+ options.value().force_virtual_addressing = force_virtual_addressing
if isinstance(retry_strategy, AwsStandardS3RetryStrategy):
options.value().retry_strategy =
CS3RetryStrategy.GetAwsStandardRetryStrategy(
@@ -447,6 +455,7 @@ cdef class S3FileSystem(FileSystem):
opts.proxy_options.username),
'password': frombytes(
opts.proxy_options.password)},
+ force_virtual_addressing=opts.force_virtual_addressing,
),)
)
diff --git a/python/pyarrow/includes/libarrow_fs.pxd
b/python/pyarrow/includes/libarrow_fs.pxd
index cb30f4e750..7876fb0f96 100644
--- a/python/pyarrow/includes/libarrow_fs.pxd
+++ b/python/pyarrow/includes/libarrow_fs.pxd
@@ -167,6 +167,7 @@ cdef extern from "arrow/filesystem/api.h" namespace
"arrow::fs" nogil:
c_bool background_writes
c_bool allow_bucket_creation
c_bool allow_bucket_deletion
+ c_bool force_virtual_addressing
shared_ptr[const CKeyValueMetadata] default_metadata
c_string role_arn
c_string session_name
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index ab10addfc3..6ba5137e4f 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -1186,6 +1186,10 @@ def test_s3_options(pickle_module):
assert pickle_module.loads(pickle_module.dumps(fs2)) == fs2
assert fs2 != fs
+ fs = S3FileSystem(endpoint_override='localhost:8999',
force_virtual_addressing=True)
+ assert isinstance(fs, S3FileSystem)
+ assert pickle_module.loads(pickle_module.dumps(fs)) == fs
+
with pytest.raises(ValueError):
S3FileSystem(access_key='access')
with pytest.raises(ValueError):