This is an automated email from the ASF dual-hosted git repository.
raulcd pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 0124d5b5b7 GH-49078: [FS][Azure] Fix lossy pickling of
`SubTreeFileSystem(base_path, AzureFileSystem(...))` (#49140)
0124d5b5b7 is described below
commit 0124d5b5b7f60b6ae6312bbb962dcff6dd4bc641
Author: Thomas Newton <[email protected]>
AuthorDate: Mon Mar 2 10:28:25 2026 +0000
GH-49078: [FS][Azure] Fix lossy pickling of `SubTreeFileSystem(base_path,
AzureFileSystem(...))` (#49140)
### Rationale for this change
Fix https://github.com/apache/arrow/issues/49078
### What changes are included in this PR?
- Implement getters on the C++ side of `AzureOptions`, for the values that
are currently stored only on the python side.
- This required adding some more member variables
- I decided to add `ClearCredentials` , so that it can't get into strange
states by configuring one credential type then another. IMO configuring the
credentials during initialisation on the `AzureOptions` would be neater but I
don't want to make this PR too big.
- Update the C++ side `AzureOptions::Equals`
- Remove python side `self` attributes and instead depend on getters from
C++ side.
### Are these changes tested?
- Updated tests on the C++ side for the updated `Equals` and newly added
getter methods.
- Added a fixture `pickle_with_and_without_subtree_filesystem`, which can
be used in place of the `pickle_module`. This adds test combinations with and
without wrapping the filesystem in a `SubTreeFilesystem` before pickling it.
### Are there any user-facing changes?
Only that pickling `SubTreeFileSystem(base_path, AzureFileSystem(...))` now
works properly.
* GitHub Issue: #49078
Authored-by: Thomas Newton <[email protected]>
Signed-off-by: Raúl Cumplido <[email protected]>
---
cpp/src/arrow/filesystem/azurefs.cc | 45 +++++++++++++---
cpp/src/arrow/filesystem/azurefs.h | 15 ++++--
cpp/src/arrow/filesystem/azurefs_test.cc | 92 ++++++++++++++++++++++++++++++--
python/pyarrow/_azurefs.pyx | 41 ++++++--------
python/pyarrow/includes/libarrow_fs.pxd | 5 ++
python/pyarrow/tests/test_fs.py | 47 +++++++++++++---
6 files changed, 194 insertions(+), 51 deletions(-)
diff --git a/cpp/src/arrow/filesystem/azurefs.cc
b/cpp/src/arrow/filesystem/azurefs.cc
index 7aa3e58c1d..20b0a655d8 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -248,13 +248,17 @@ Result<AzureOptions> AzureOptions::FromUri(const
std::string& uri_string,
}
bool AzureOptions::Equals(const AzureOptions& other) const {
- const bool equals = blob_storage_authority == other.blob_storage_authority &&
- dfs_storage_authority == other.dfs_storage_authority &&
- blob_storage_scheme == other.blob_storage_scheme &&
- dfs_storage_scheme == other.dfs_storage_scheme &&
- default_metadata == other.default_metadata &&
- account_name == other.account_name &&
- credential_kind_ == other.credential_kind_;
+ const bool equals =
+ account_name == other.account_name &&
+ blob_storage_authority == other.blob_storage_authority &&
+ dfs_storage_authority == other.dfs_storage_authority &&
+ blob_storage_scheme == other.blob_storage_scheme &&
+ dfs_storage_scheme == other.dfs_storage_scheme &&
+ default_metadata == other.default_metadata &&
+ background_writes == other.background_writes &&
+ credential_kind_ == other.credential_kind_ && account_key_ ==
other.account_key_ &&
+ sas_token_ == other.sas_token_ && tenant_id_ == other.tenant_id_ &&
+ client_id_ == other.client_id_ && client_secret_ == other.client_secret_;
if (!equals) {
return false;
}
@@ -318,39 +322,59 @@ std::string AzureOptions::AccountDfsUrl(const
std::string& account_name) const {
return BuildBaseUrl(dfs_storage_scheme, dfs_storage_authority, account_name);
}
+void AzureOptions::ClearCredentials() {
+ credential_kind_ = CredentialKind::kDefault;
+ storage_shared_key_credential_ = nullptr;
+ account_key_.clear();
+ sas_token_.clear();
+ tenant_id_.clear();
+ client_id_.clear();
+ client_secret_.clear();
+ token_credential_ = nullptr;
+}
+
Status AzureOptions::ConfigureDefaultCredential() {
+ ClearCredentials();
credential_kind_ = CredentialKind::kDefault;
token_credential_ =
std::make_shared<Azure::Identity::DefaultAzureCredential>();
return Status::OK();
}
Status AzureOptions::ConfigureAnonymousCredential() {
+ ClearCredentials();
credential_kind_ = CredentialKind::kAnonymous;
return Status::OK();
}
Status AzureOptions::ConfigureAccountKeyCredential(const std::string&
account_key) {
+ ClearCredentials();
credential_kind_ = CredentialKind::kStorageSharedKey;
if (account_name.empty()) {
return Status::Invalid("AzureOptions doesn't contain a valid account
name");
}
+ account_key_ = account_key;
storage_shared_key_credential_ =
std::make_shared<Storage::StorageSharedKeyCredential>(account_name,
account_key);
return Status::OK();
}
Status AzureOptions::ConfigureSASCredential(const std::string& sas_token) {
- credential_kind_ = CredentialKind::kSASToken;
+ ClearCredentials();
if (account_name.empty()) {
return Status::Invalid("AzureOptions doesn't contain a valid account
name");
}
sas_token_ = sas_token;
+ credential_kind_ = CredentialKind::kSASToken;
return Status::OK();
}
Status AzureOptions::ConfigureClientSecretCredential(const std::string&
tenant_id,
const std::string&
client_id,
const std::string&
client_secret) {
+ ClearCredentials();
+ tenant_id_ = tenant_id;
+ client_id_ = client_id;
+ client_secret_ = client_secret;
credential_kind_ = CredentialKind::kClientSecret;
token_credential_ =
std::make_shared<Azure::Identity::ClientSecretCredential>(
tenant_id, client_id, client_secret);
@@ -358,6 +382,8 @@ Status AzureOptions::ConfigureClientSecretCredential(const
std::string& tenant_i
}
Status AzureOptions::ConfigureManagedIdentityCredential(const std::string&
client_id) {
+ ClearCredentials();
+ client_id_ = client_id;
credential_kind_ = CredentialKind::kManagedIdentity;
token_credential_ =
std::make_shared<Azure::Identity::ManagedIdentityCredential>(client_id);
@@ -365,18 +391,21 @@ Status
AzureOptions::ConfigureManagedIdentityCredential(const std::string& clien
}
Status AzureOptions::ConfigureCLICredential() {
+ ClearCredentials();
credential_kind_ = CredentialKind::kCLI;
token_credential_ = std::make_shared<Azure::Identity::AzureCliCredential>();
return Status::OK();
}
Status AzureOptions::ConfigureWorkloadIdentityCredential() {
+ ClearCredentials();
credential_kind_ = CredentialKind::kWorkloadIdentity;
token_credential_ =
std::make_shared<Azure::Identity::WorkloadIdentityCredential>();
return Status::OK();
}
Status AzureOptions::ConfigureEnvironmentCredential() {
+ ClearCredentials();
credential_kind_ = CredentialKind::kEnvironment;
token_credential_ =
std::make_shared<Azure::Identity::EnvironmentCredential>();
return Status::OK();
diff --git a/cpp/src/arrow/filesystem/azurefs.h
b/cpp/src/arrow/filesystem/azurefs.h
index ae374d487b..d21bcd08df 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -99,10 +99,6 @@ struct ARROW_EXPORT AzureOptions {
/// Default: "https"
std::string dfs_storage_scheme = "https";
- // TODO(GH-38598): Add support for more auth methods.
- // std::string connection_string;
- // std::string sas_token;
-
/// \brief Default metadata for OpenOutputStream.
///
/// This will be ignored if non-empty metadata is passed to OpenOutputStream.
@@ -126,7 +122,11 @@ struct ARROW_EXPORT AzureOptions {
std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
storage_shared_key_credential_;
+ std::string account_key_;
std::string sas_token_;
+ std::string tenant_id_;
+ std::string client_id_;
+ std::string client_secret_;
mutable std::shared_ptr<Azure::Core::Credentials::TokenCredential>
token_credential_;
public:
@@ -136,6 +136,7 @@ struct ARROW_EXPORT AzureOptions {
private:
void ExtractFromUriSchemeAndHierPart(const Uri& uri, std::string* out_path);
Status ExtractFromUriQuery(const Uri& uri);
+ void ClearCredentials();
public:
/// \brief Construct a new AzureOptions from an URI.
@@ -204,6 +205,12 @@ struct ARROW_EXPORT AzureOptions {
std::string AccountBlobUrl(const std::string& account_name) const;
std::string AccountDfsUrl(const std::string& account_name) const;
+ std::string AccountKey() const { return account_key_; }
+ std::string SasToken() const { return sas_token_; }
+ std::string TenantId() const { return tenant_id_; }
+ std::string ClientId() const { return client_id_; }
+ std::string ClientSecret() const { return client_secret_; }
+
Result<std::unique_ptr<Azure::Storage::Blobs::BlobServiceClient>>
MakeBlobServiceClient() const;
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc
b/cpp/src/arrow/filesystem/azurefs_test.cc
index c3af6fb079..4cd4250555 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -493,6 +493,11 @@ TEST(AzureFileSystem, InitializeWithDefaultCredential) {
AzureOptions options;
options.account_name = "dummy-account-name";
ARROW_EXPECT_OK(options.ConfigureDefaultCredential());
+ ASSERT_EQ(options.AccountKey(), "");
+ ASSERT_EQ(options.SasToken(), "");
+ ASSERT_EQ(options.TenantId(), "");
+ ASSERT_EQ(options.ClientId(), "");
+ ASSERT_EQ(options.ClientSecret(), "");
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
}
@@ -509,6 +514,23 @@ TEST(AzureFileSystem, InitializeWithAnonymousCredential) {
AzureOptions options;
options.account_name = "dummy-account-name";
ARROW_EXPECT_OK(options.ConfigureAnonymousCredential());
+ ASSERT_EQ(options.AccountKey(), "");
+ ASSERT_EQ(options.SasToken(), "");
+ ASSERT_EQ(options.TenantId(), "");
+ ASSERT_EQ(options.ClientId(), "");
+ ASSERT_EQ(options.ClientSecret(), "");
+ EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
+}
+
+TEST(AzureFileSystem, InitializeWithAccountKeyCredential) {
+ AzureOptions options;
+ options.account_name = "dummy-account-name";
+ ARROW_EXPECT_OK(options.ConfigureAccountKeyCredential("account_key"));
+ ASSERT_EQ(options.AccountKey(), "account_key");
+ ASSERT_EQ(options.SasToken(), "");
+ ASSERT_EQ(options.TenantId(), "");
+ ASSERT_EQ(options.ClientId(), "");
+ ASSERT_EQ(options.ClientSecret(), "");
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
}
@@ -517,6 +539,11 @@ TEST(AzureFileSystem,
InitializeWithClientSecretCredential) {
options.account_name = "dummy-account-name";
ARROW_EXPECT_OK(
options.ConfigureClientSecretCredential("tenant_id", "client_id",
"client_secret"));
+ ASSERT_EQ(options.AccountKey(), "");
+ ASSERT_EQ(options.SasToken(), "");
+ ASSERT_EQ(options.TenantId(), "tenant_id");
+ ASSERT_EQ(options.ClientId(), "client_id");
+ ASSERT_EQ(options.ClientSecret(), "client_secret");
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
}
@@ -524,9 +551,19 @@ TEST(AzureFileSystem,
InitializeWithManagedIdentityCredential) {
AzureOptions options;
options.account_name = "dummy-account-name";
ARROW_EXPECT_OK(options.ConfigureManagedIdentityCredential());
+ ASSERT_EQ(options.AccountKey(), "");
+ ASSERT_EQ(options.SasToken(), "");
+ ASSERT_EQ(options.TenantId(), "");
+ ASSERT_EQ(options.ClientId(), "");
+ ASSERT_EQ(options.ClientSecret(), "");
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
ARROW_EXPECT_OK(options.ConfigureManagedIdentityCredential("specific-client-id"));
+ ASSERT_EQ(options.AccountKey(), "");
+ ASSERT_EQ(options.SasToken(), "");
+ ASSERT_EQ(options.TenantId(), "");
+ ASSERT_EQ(options.ClientId(), "specific-client-id");
+ ASSERT_EQ(options.ClientSecret(), "");
EXPECT_OK_AND_ASSIGN(fs, AzureFileSystem::Make(options));
}
@@ -534,6 +571,11 @@ TEST(AzureFileSystem, InitializeWithCLICredential) {
AzureOptions options;
options.account_name = "dummy-account-name";
ARROW_EXPECT_OK(options.ConfigureCLICredential());
+ ASSERT_EQ(options.AccountKey(), "");
+ ASSERT_EQ(options.SasToken(), "");
+ ASSERT_EQ(options.TenantId(), "");
+ ASSERT_EQ(options.ClientId(), "");
+ ASSERT_EQ(options.ClientSecret(), "");
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
}
@@ -541,6 +583,11 @@ TEST(AzureFileSystem,
InitializeWithWorkloadIdentityCredential) {
AzureOptions options;
options.account_name = "dummy-account-name";
ARROW_EXPECT_OK(options.ConfigureWorkloadIdentityCredential());
+ ASSERT_EQ(options.AccountKey(), "");
+ ASSERT_EQ(options.SasToken(), "");
+ ASSERT_EQ(options.TenantId(), "");
+ ASSERT_EQ(options.ClientId(), "");
+ ASSERT_EQ(options.ClientSecret(), "");
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
}
@@ -548,12 +595,42 @@ TEST(AzureFileSystem,
InitializeWithEnvironmentCredential) {
AzureOptions options;
options.account_name = "dummy-account-name";
ARROW_EXPECT_OK(options.ConfigureEnvironmentCredential());
+ ASSERT_EQ(options.AccountKey(), "");
+ ASSERT_EQ(options.SasToken(), "");
+ ASSERT_EQ(options.TenantId(), "");
+ ASSERT_EQ(options.ClientId(), "");
+ ASSERT_EQ(options.ClientSecret(), "");
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
}
TEST(AzureFileSystem, OptionsCompare) {
- AzureOptions options;
- EXPECT_TRUE(options.Equals(options));
+ AzureOptions options0;
+ EXPECT_TRUE(options0.Equals(options0));
+
+ AzureOptions options1;
+ options1.account_name = "account_name";
+ EXPECT_FALSE(options1.Equals(options0));
+
+ AzureOptions options2;
+ options2.account_name = "account_name";
+ ASSERT_OK(options2.ConfigureAccountKeyCredential("fake_account_key"));
+ EXPECT_FALSE(options2.Equals(options1));
+
+ AzureOptions options3;
+ options3.account_name = "account_name";
+
ASSERT_OK(options3.ConfigureAccountKeyCredential("different_fake_account_key"));
+ EXPECT_FALSE(options3.Equals(options2));
+
+ AzureOptions options4;
+ options4.account_name = "account_name";
+ ASSERT_OK(options4.ConfigureSASCredential("fake_sas_token"));
+ EXPECT_FALSE(options4.Equals(options3));
+
+ AzureOptions options5;
+ options5.account_name = "account_name";
+ ASSERT_OK(options5.ConfigureClientSecretCredential("fake_tenant_id",
"fake_client_id",
+ "fake_client_secret"));
+ EXPECT_FALSE(options5.Equals(options4));
}
class TestAzureOptions : public ::testing::Test {
@@ -1679,9 +1756,14 @@ class TestAzureFileSystem : public ::testing::Test {
env->account_name(), env->account_key())));
// AzureOptions::FromUri will not cut off extra query parameters that it
consumes, so
// make sure these don't cause problems.
- ARROW_EXPECT_OK(options.ConfigureSASCredential(
- "?blob_storage_authority=dummy_value0&" + sas_token.substr(1) +
- "&credential_kind=dummy-value1"));
+ auto polluted_sas_token = "?blob_storage_authority=dummy_value0&" +
+ sas_token.substr(1) +
"&credential_kind=dummy-value1";
+ ARROW_EXPECT_OK(options.ConfigureSASCredential(polluted_sas_token));
+ ASSERT_EQ(options.AccountKey(), "");
+ ASSERT_EQ(options.SasToken(), polluted_sas_token);
+ ASSERT_EQ(options.TenantId(), "");
+ ASSERT_EQ(options.ClientId(), "");
+ ASSERT_EQ(options.ClientSecret(), "");
EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
AssertFileInfo(fs.get(), data.ObjectPath(), FileType::File);
diff --git a/python/pyarrow/_azurefs.pyx b/python/pyarrow/_azurefs.pyx
index aa6ad1d90e..deb58b0aed 100644
--- a/python/pyarrow/_azurefs.pyx
+++ b/python/pyarrow/_azurefs.pyx
@@ -28,33 +28,33 @@ cdef class AzureFileSystem(FileSystem):
Azure Blob Storage backed FileSystem implementation
This implementation supports flat namespace and hierarchical namespace
(HNS) a.k.a.
- Data Lake Gen2 storage accounts. HNS will be automatically detected and
HNS specific
- features will be used when they provide a performance advantage. Azurite
emulator is
+ Data Lake Gen2 storage accounts. HNS will be automatically detected and
HNS specific
+ features will be used when they provide a performance advantage. Azurite
emulator is
also supported. Note: `/` is the only supported delimiter.
- The storage account is considered the root of the filesystem. When
enabled, containers
- will be created or deleted during relevant directory operations.
Obviously, this also
- requires authentication with the additional permissions.
+ The storage account is considered the root of the filesystem. When
enabled, containers
+ will be created or deleted during relevant directory operations.
Obviously, this also
+ requires authentication with the additional permissions.
- By default `DefaultAzureCredential
<https://github.com/Azure/azure-sdk-for-cpp/blob/main/sdk/identity/azure-identity/README.md#defaultazurecredential>`__
+ By default `DefaultAzureCredential
<https://github.com/Azure/azure-sdk-for-cpp/blob/main/sdk/identity/azure-identity/README.md#defaultazurecredential>`__
is used for authentication. This means it will try several types of
authentication
- and go with the first one that works. If any authentication parameters are
provided when
+ and go with the first one that works. If any authentication parameters are
provided when
initialising the FileSystem, they will be used instead of the default
credential.
Parameters
----------
account_name : str
- Azure Blob Storage account name. This is the globally unique
identifier for the
+ Azure Blob Storage account name. This is the globally unique
identifier for the
storage account.
account_key : str, default None
- Account key of the storage account. If sas_token and account_key are
None the
+ Account key of the storage account. If sas_token and account_key are
None the
default credential will be used. The parameters account_key and
sas_token are
mutually exclusive.
blob_storage_authority : str, default None
hostname[:port] of the Blob Service. Defaults to
`.blob.core.windows.net`. Useful
for connecting to a local emulator, like Azurite.
blob_storage_scheme : str, default None
- Either `http` or `https`. Defaults to `https`. Useful for connecting
to a local
+ Either `http` or `https`. Defaults to `https`. Useful for connecting
to a local
emulator, like Azurite.
client_id : str, default None
The client ID (Application ID) for Azure Active Directory
authentication.
@@ -101,11 +101,6 @@ cdef class AzureFileSystem(FileSystem):
"""
cdef:
CAzureFileSystem* azurefs
- c_string account_key
- c_string sas_token
- c_string tenant_id
- c_string client_id
- c_string client_secret
def __init__(self, account_name, *, account_key=None,
blob_storage_authority=None,
blob_storage_scheme=None, client_id=None, client_secret=None,
@@ -133,14 +128,10 @@ cdef class AzureFileSystem(FileSystem):
raise ValueError("client_id must be specified")
if not tenant_id and not client_secret:
options.ConfigureManagedIdentityCredential(tobytes(client_id))
- self.client_id = tobytes(client_id)
elif tenant_id and client_secret:
options.ConfigureClientSecretCredential(
tobytes(tenant_id), tobytes(client_id),
tobytes(client_secret)
)
- self.tenant_id = tobytes(tenant_id)
- self.client_id = tobytes(client_id)
- self.client_secret = tobytes(client_secret)
else:
raise ValueError(
"Invalid Azure credential configuration: "
@@ -149,10 +140,8 @@ cdef class AzureFileSystem(FileSystem):
)
elif account_key:
options.ConfigureAccountKeyCredential(tobytes(account_key))
- self.account_key = tobytes(account_key)
elif sas_token:
options.ConfigureSASCredential(tobytes(sas_token))
- self.sas_token = tobytes(sas_token)
else:
options.ConfigureDefaultCredential()
@@ -176,13 +165,13 @@ cdef class AzureFileSystem(FileSystem):
return (
AzureFileSystem._reconstruct, (dict(
account_name=frombytes(opts.account_name),
- account_key=frombytes(self.account_key),
+ account_key=frombytes(opts.AccountKey()),
blob_storage_authority=frombytes(opts.blob_storage_authority),
blob_storage_scheme=frombytes(opts.blob_storage_scheme),
- client_id=frombytes(self.client_id),
- client_secret=frombytes(self.client_secret),
+ client_id=frombytes(opts.ClientId()),
+ client_secret=frombytes(opts.ClientSecret()),
dfs_storage_authority=frombytes(opts.dfs_storage_authority),
dfs_storage_scheme=frombytes(opts.dfs_storage_scheme),
- sas_token=frombytes(self.sas_token),
- tenant_id=frombytes(self.tenant_id)
+ sas_token=frombytes(opts.SasToken()),
+ tenant_id=frombytes(opts.TenantId())
),))
diff --git a/python/pyarrow/includes/libarrow_fs.pxd
b/python/pyarrow/includes/libarrow_fs.pxd
index af01c47c8c..d18dc2d2bd 100644
--- a/python/pyarrow/includes/libarrow_fs.pxd
+++ b/python/pyarrow/includes/libarrow_fs.pxd
@@ -259,6 +259,11 @@ cdef extern from "arrow/filesystem/api.h" namespace
"arrow::fs" nogil:
CStatus ConfigureClientSecretCredential(c_string tenant_id,
c_string client_id,
c_string client_secret)
+ c_string SasToken()
+ c_string AccountKey()
+ c_string TenantId()
+ c_string ClientId()
+ c_string ClientSecret()
cdef cppclass CAzureFileSystem "arrow::fs::AzureFileSystem":
@staticmethod
diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py
index 376398baa0..5bf1950c06 100644
--- a/python/pyarrow/tests/test_fs.py
+++ b/python/pyarrow/tests/test_fs.py
@@ -636,7 +636,29 @@ def test_subtree_filesystem():
' base_fs=<pyarrow._fs.LocalFileSystem')
-def test_filesystem_pickling(fs, pickle_module):
+class _PickleModuleSubTreeFileSystemWrapper:
+ def __init__(self, pickle_module):
+ self.pickle_module = pickle_module
+
+ def dumps(self, obj):
+ return self.pickle_module.dumps(SubTreeFileSystem("/", obj))
+
+ def loads(self, data):
+ return self.pickle_module.loads(data).base_fs
+
+
[email protected](params=[True, False])
+def pickle_with_and_without_subtree_filesystem(pickle_module, request):
+ # When creating a SubTreeFileSystem, the python side object for the base
+ # filesystem is lost. This makes it a pickling scenario worth testing.
+ wrap_with_subtree_filesystem = request.param
+ if wrap_with_subtree_filesystem:
+ return _PickleModuleSubTreeFileSystemWrapper(pickle_module)
+ return pickle_module
+
+
+def test_filesystem_pickling(fs, pickle_with_and_without_subtree_filesystem):
+ pickle_module = pickle_with_and_without_subtree_filesystem
if fs.type_name.split('::')[-1] == 'mock':
pytest.xfail(reason='MockFileSystem is not serializable')
@@ -646,7 +668,10 @@ def test_filesystem_pickling(fs, pickle_module):
assert restored.equals(fs)
-def test_filesystem_is_functional_after_pickling(fs, pathfn, pickle_module):
+def test_filesystem_is_functional_after_pickling(
+ fs, pathfn, pickle_with_and_without_subtree_filesystem
+):
+ pickle_module = pickle_with_and_without_subtree_filesystem
if fs.type_name.split('::')[-1] == 'mock':
pytest.xfail(reason='MockFileSystem is not serializable')
skip_fsspec_s3fs(fs)
@@ -1171,7 +1196,8 @@ def test_mockfs_mtime_roundtrip(mockfs):
@pytest.mark.gcs
-def test_gcs_options(pickle_module):
+def test_gcs_options(pickle_with_and_without_subtree_filesystem):
+ pickle_module = pickle_with_and_without_subtree_filesystem
from pyarrow.fs import GcsFileSystem
dt = datetime.now()
fs = GcsFileSystem(access_token='abc',
@@ -1209,7 +1235,8 @@ def test_gcs_options(pickle_module):
@pytest.mark.s3
-def test_s3_options(pickle_module):
+def test_s3_options(pickle_with_and_without_subtree_filesystem):
+ pickle_module = pickle_with_and_without_subtree_filesystem
from pyarrow.fs import (AwsDefaultS3RetryStrategy,
AwsStandardS3RetryStrategy, S3FileSystem,
S3RetryStrategy)
@@ -1313,7 +1340,8 @@ def test_s3_options(pickle_module):
@pytest.mark.s3
-def test_s3_proxy_options(monkeypatch, pickle_module):
+def test_s3_proxy_options(monkeypatch,
pickle_with_and_without_subtree_filesystem):
+ pickle_module = pickle_with_and_without_subtree_filesystem
from pyarrow.fs import S3FileSystem
# The following two are equivalent:
@@ -1478,7 +1506,8 @@ def test_s3fs_wrong_region():
@pytest.mark.azure
-def test_azurefs_options(pickle_module):
+def test_azurefs_options(pickle_with_and_without_subtree_filesystem):
+ pickle_module = pickle_with_and_without_subtree_filesystem
from pyarrow.fs import AzureFileSystem
fs1 = AzureFileSystem(account_name='fake-account-name')
@@ -1572,7 +1601,8 @@ def test_azurefs_options(pickle_module):
@pytest.mark.hdfs
-def test_hdfs_options(hdfs_connection, pickle_module):
+def test_hdfs_options(hdfs_connection,
pickle_with_and_without_subtree_filesystem):
+ pickle_module = pickle_with_and_without_subtree_filesystem
from pyarrow.fs import HadoopFileSystem
if not pa.have_libhdfs():
pytest.skip('Cannot locate libhdfs')
@@ -1765,7 +1795,8 @@ def test_py_filesystem_equality():
assert fs1 != object()
-def test_py_filesystem_pickling(pickle_module):
+def test_py_filesystem_pickling(pickle_with_and_without_subtree_filesystem):
+ pickle_module = pickle_with_and_without_subtree_filesystem
handler = DummyHandler()
fs = PyFileSystem(handler)