kou commented on code in PR #38375:
URL: https://github.com/apache/arrow/pull/38375#discussion_r1369446133
##########
python/pyarrow/fs.py:
##########
@@ -57,10 +57,6 @@
finalize_s3, initialize_s3, resolve_s3_region)
except ImportError:
_not_imported.append("S3FileSystem")
-else:
- ensure_s3_initialized()
- import atexit
- atexit.register(finalize_s3)
Review Comment:
How about using `arrow::EnsureS3Finalized()` instead?
```diff
diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx
index ab4517136..21c3cc94d 100644
--- a/python/pyarrow/_s3fs.pyx
+++ b/python/pyarrow/_s3fs.pyx
@@ -70,6 +70,13 @@ def finalize_s3():
check_status(CFinalizeS3())
+def ensure_s3_finalized():
+ """
+ Finalize S3 if already initialized
+ """
+ check_status(CEnsureS3Finalized())
+
+
def resolve_s3_region(bucket):
"""
Resolve the S3 region of a bucket.
diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py
index 36655c7d1..4cb723f53 100644
--- a/python/pyarrow/fs.py
+++ b/python/pyarrow/fs.py
@@ -54,13 +54,12 @@ try:
from pyarrow._s3fs import ( # noqa
AwsDefaultS3RetryStrategy, AwsStandardS3RetryStrategy,
S3FileSystem, S3LogLevel, S3RetryStrategy, ensure_s3_initialized,
- finalize_s3, initialize_s3, resolve_s3_region)
+ finalize_s3, ensure_s3_finalized, initialize_s3, resolve_s3_region)
except ImportError:
_not_imported.append("S3FileSystem")
else:
- ensure_s3_initialized()
import atexit
- atexit.register(finalize_s3)
+ atexit.register(ensure_s3_finalized)
def __getattr__(name):
diff --git a/python/pyarrow/includes/libarrow_fs.pxd
b/python/pyarrow/includes/libarrow_fs.pxd
index 2727fc201..cb30f4e75 100644
--- a/python/pyarrow/includes/libarrow_fs.pxd
+++ b/python/pyarrow/includes/libarrow_fs.pxd
@@ -211,6 +211,7 @@ cdef extern from "arrow/filesystem/api.h" namespace
"arrow::fs" nogil:
const CS3GlobalOptions& options)
cdef CStatus CEnsureS3Initialized "arrow::fs::EnsureS3Initialized"()
cdef CStatus CFinalizeS3 "arrow::fs::FinalizeS3"()
+ cdef CStatus CEnsureS3Finalized "arrow::fs::EnsureS3Finalized"()
cdef CResult[c_string] ResolveS3BucketRegion(const c_string& bucket)
```
##########
python/pyarrow/_s3fs.pyx:
##########
@@ -260,6 +271,26 @@ cdef class S3FileSystem(FileSystem):
load_frequency=900, proxy_options=None,
allow_bucket_creation=False, allow_bucket_deletion=False,
retry_strategy: S3RetryStrategy =
AwsStandardS3RetryStrategy(max_attempts=3)):
+ ensure_s3_initialized()
+
+ self._initialize_s3(access_key=access_key, secret_key=secret_key,
session_token=session_token,
+ anonymous=anonymous, region=region,
request_timeout=request_timeout,
+ connect_timeout=connect_timeout, scheme=scheme,
endpoint_override=endpoint_override,
+ background_writes=background_writes,
default_metadata=default_metadata,
+ role_arn=role_arn, session_name=session_name,
external_id=external_id,
+ load_frequency=load_frequency,
proxy_options=proxy_options,
+ allow_bucket_creation=allow_bucket_creation,
allow_bucket_deletion=allow_bucket_deletion,
+ retry_strategy=retry_strategy)
+
+ def _initialize_s3(self, *, access_key=None, secret_key=None,
session_token=None,
+ bint anonymous=False, region=None, request_timeout=None,
+ connect_timeout=None, scheme=None,
endpoint_override=None,
+ bint background_writes=True, default_metadata=None,
+ role_arn=None, session_name=None, external_id=None,
+ load_frequency=900, proxy_options=None,
+ allow_bucket_creation=False,
allow_bucket_deletion=False,
+ retry_strategy: S3RetryStrategy =
AwsStandardS3RetryStrategy(max_attempts=3)):
+
Review Comment:
Do we need this?
It seems that `__init__()` already calls `ensure_s3_initialized()`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]