This is an automated email from the ASF dual-hosted git repository. lzljs3620320 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push: new 77078566bd [Python] fix oss file_io client_kwargs for pyarrow 16.0 (#6255) 77078566bd is described below commit 77078566bd9212dba54e348a27aa68a74c93fc49 Author: ChengHui Chen <27797326+chenghuic...@users.noreply.github.com> AuthorDate: Mon Sep 15 15:59:31 2025 +0800 [Python] fix oss file_io client_kwargs for pyarrow 16.0 (#6255) --- paimon-python/pypaimon/common/config.py | 1 + paimon-python/pypaimon/common/file_io.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/paimon-python/pypaimon/common/config.py b/paimon-python/pypaimon/common/config.py index 0478c207bb..b3c9a673b3 100644 --- a/paimon-python/pypaimon/common/config.py +++ b/paimon-python/pypaimon/common/config.py @@ -21,6 +21,7 @@ class OssOptions: OSS_SECURITY_TOKEN = "fs.oss.securityToken" OSS_ENDPOINT = "fs.oss.endpoint" OSS_REGION = "fs.oss.region" + OSS_BUCKET = "fs.oss.bucket" class S3Options: diff --git a/paimon-python/pypaimon/common/file_io.py b/paimon-python/pypaimon/common/file_io.py index 3ec930b24b..809d65338c 100644 --- a/paimon-python/pypaimon/common/file_io.py +++ b/paimon-python/pypaimon/common/file_io.py @@ -24,6 +24,7 @@ from typing import Any, Dict, List, Optional from urllib.parse import splitport, urlparse import pyarrow +from packaging.version import parse from pyarrow._fs import FileSystem from pypaimon.common.config import OssOptions, S3Options @@ -57,15 +58,22 @@ class FileIO: def _initialize_oss_fs(self) -> FileSystem: from pyarrow.fs import S3FileSystem - bucket_name = self.properties.get("prefix") + client_kwargs = { - "endpoint_override": bucket_name + "." + self.properties.get(OssOptions.OSS_ENDPOINT), "access_key": self.properties.get(OssOptions.OSS_ACCESS_KEY_ID), "secret_key": self.properties.get(OssOptions.OSS_ACCESS_KEY_SECRET), "session_token": self.properties.get(OssOptions.OSS_SECURITY_TOKEN), "region": self.properties.get(OssOptions.OSS_REGION), } + # Based on https://github.com/apache/arrow/issues/40506 + if parse(pyarrow.__version__) >= parse("7.0.0"): + client_kwargs['force_virtual_addressing'] = True + client_kwargs['endpoint_override'] = self.properties.get(OssOptions.OSS_ENDPOINT) + else: + client_kwargs['endpoint_override'] = (self.properties.get(OssOptions.OSS_BUCKET) + "." + + self.properties.get(OssOptions.OSS_ENDPOINT)) + return S3FileSystem(**client_kwargs) def _initialize_s3_fs(self) -> FileSystem: