This is an automated email from the ASF dual-hosted git repository.
kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new c7b55b12 Rename `gcs.endpoint` to `gcs.service.host` (#1007)
c7b55b12 is described below
commit c7b55b12c5d2e3a9ae90ac21016299c7dff46fff
Author: Fokko Driesprong <[email protected]>
AuthorDate: Wed Nov 6 18:44:21 2024 +0100
Rename `gcs.endpoint` to `gcs.service.host` (#1007)
* Rename `gcs.endpoint` to `gcs.service.host`
To make it in line with Java:
https://github.com/apache/iceberg/blob/6ee6d1327d3811dbd5795c4e87efdc41b7a58eaa/gcp/src/main/java/org/apache/iceberg/gcp/GCPProperties.java#L32
* Import
Co-authored-by: Andre Luis Anastacio <[email protected]>
* Use `deprecation_message` instead
Co-authored-by: Andre Luis Anastacio <[email protected]>
* Use `deprecation_message` instead
Co-authored-by: Andre Luis Anastacio <[email protected]>
* Fix message
* Update pyiceberg/io/fsspec.py
Co-authored-by: Kevin Liu <[email protected]>
* Update pyiceberg/io/fsspec.py
Co-authored-by: Kevin Liu <[email protected]>
* Update pyiceberg/io/pyarrow.py
Co-authored-by: Kevin Liu <[email protected]>
---------
Co-authored-by: Andre Luis Anastacio <[email protected]>
Co-authored-by: Kevin Liu <[email protected]>
---
mkdocs/docs/configuration.md | 2 +-
pyiceberg/io/__init__.py | 1 +
pyiceberg/io/fsspec.py | 9 ++++++++-
pyiceberg/io/pyarrow.py | 11 +++++++++--
tests/conftest.py | 6 +++---
5 files changed, 22 insertions(+), 7 deletions(-)
diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md
index ba77867b..8302f355 100644
--- a/mkdocs/docs/configuration.md
+++ b/mkdocs/docs/configuration.md
@@ -161,7 +161,7 @@ For the FileIO there are several configuration options
available:
| gcs.cache-timeout | 60 | Configure the cache
expiration time in seconds for object metadata cache
|
| gcs.requester-pays | False | Configure whether to use
requester-pays requests
|
| gcs.session-kwargs | {} | Configure a dict of
parameters to pass on to aiohttp.ClientSession; can contain, for example, proxy
settings.
|
-| gcs.endpoint | <http://0.0.0.0:4443> | Configure an
alternative endpoint for the GCS FileIO to access (format protocol://host:port)
If not given, defaults to the value of environment variable
"STORAGE_EMULATOR_HOST"; if that is not set either, will use the standard
Google endpoint. |
+| gcs.service.host | <http://0.0.0.0:4443> | Configure an
alternative endpoint for the GCS FileIO to access (format protocol://host:port)
If not given, defaults to the value of environment variable
"STORAGE_EMULATOR_HOST"; if that is not set either, will use the standard
Google endpoint. |
| gcs.default-location | US | Configure the default
location where buckets are created, like 'US' or 'EUROPE-WEST3'.
|
| gcs.version-aware | False | Configure whether to
support object versioning on the GCS bucket.
|
diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py
index 23a2cf35..5aded82d 100644
--- a/pyiceberg/io/__init__.py
+++ b/pyiceberg/io/__init__.py
@@ -94,6 +94,7 @@ GCS_CACHE_TIMEOUT = "gcs.cache-timeout"
GCS_REQUESTER_PAYS = "gcs.requester-pays"
GCS_SESSION_KWARGS = "gcs.session-kwargs"
GCS_ENDPOINT = "gcs.endpoint"
+GCS_SERVICE_HOST = "gcs.service.host"
GCS_DEFAULT_LOCATION = "gcs.default-bucket-location"
GCS_VERSION_AWARE = "gcs.version-aware"
PYARROW_USE_LARGE_TYPES_ON_READ = "pyarrow.use-large-types-on-read"
diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py
index 1632c4bb..434ae67d 100644
--- a/pyiceberg/io/fsspec.py
+++ b/pyiceberg/io/fsspec.py
@@ -64,6 +64,7 @@ from pyiceberg.io import (
GCS_ENDPOINT,
GCS_PROJECT_ID,
GCS_REQUESTER_PAYS,
+ GCS_SERVICE_HOST,
GCS_SESSION_KWARGS,
GCS_TOKEN,
GCS_VERSION_AWARE,
@@ -171,6 +172,12 @@ def _gs(properties: Properties) -> AbstractFileSystem:
# https://gcsfs.readthedocs.io/en/latest/api.html#gcsfs.core.GCSFileSystem
from gcsfs import GCSFileSystem
+ if properties.get(GCS_ENDPOINT):
+ deprecation_message(
+ deprecated_in="0.8.0",
+ removed_in="0.9.0",
+ help_message=f"The property {GCS_ENDPOINT} is deprecated, please
use {GCS_SERVICE_HOST} instead",
+ )
return GCSFileSystem(
project=properties.get(GCS_PROJECT_ID),
access=properties.get(GCS_ACCESS, "full_control"),
@@ -179,7 +186,7 @@ def _gs(properties: Properties) -> AbstractFileSystem:
cache_timeout=properties.get(GCS_CACHE_TIMEOUT),
requester_pays=property_as_bool(properties, GCS_REQUESTER_PAYS, False),
session_kwargs=json.loads(properties.get(GCS_SESSION_KWARGS, "{}")),
- endpoint_url=properties.get(GCS_ENDPOINT),
+ endpoint_url=get_first_property_value(properties, GCS_SERVICE_HOST,
GCS_ENDPOINT),
default_location=properties.get(GCS_DEFAULT_LOCATION),
version_aware=property_as_bool(properties, GCS_VERSION_AWARE, False),
)
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index a053b83a..52a2756a 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -91,6 +91,7 @@ from pyiceberg.io import (
AWS_SESSION_TOKEN,
GCS_DEFAULT_LOCATION,
GCS_ENDPOINT,
+ GCS_SERVICE_HOST,
GCS_TOKEN,
GCS_TOKEN_EXPIRES_AT_MS,
HDFS_HOST,
@@ -163,7 +164,7 @@ from pyiceberg.types import (
from pyiceberg.utils.concurrent import ExecutorFactory
from pyiceberg.utils.config import Config
from pyiceberg.utils.datetime import millis_to_datetime
-from pyiceberg.utils.deprecated import deprecated
+from pyiceberg.utils.deprecated import deprecated, deprecation_message
from pyiceberg.utils.properties import get_first_property_value,
property_as_bool, property_as_int
from pyiceberg.utils.singleton import Singleton
from pyiceberg.utils.truncate import truncate_upper_bound_binary_string,
truncate_upper_bound_text_string
@@ -400,7 +401,13 @@ class PyArrowFileIO(FileIO):
gcs_kwargs["credential_token_expiration"] =
millis_to_datetime(int(expiration))
if bucket_location := self.properties.get(GCS_DEFAULT_LOCATION):
gcs_kwargs["default_bucket_location"] = bucket_location
- if endpoint := self.properties.get(GCS_ENDPOINT):
+ if endpoint := get_first_property_value(self.properties,
GCS_SERVICE_HOST, GCS_ENDPOINT):
+ if self.properties.get(GCS_ENDPOINT):
+ deprecation_message(
+ deprecated_in="0.8.0",
+ removed_in="0.9.0",
+ help_message=f"The property {GCS_ENDPOINT} is
deprecated, please use {GCS_SERVICE_HOST} instead",
+ )
url_parts = urlparse(endpoint)
gcs_kwargs["scheme"] = url_parts.scheme
gcs_kwargs["endpoint_override"] = url_parts.netloc
diff --git a/tests/conftest.py b/tests/conftest.py
index e7e73375..9160a143 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -53,8 +53,8 @@ from pyiceberg.catalog import Catalog, load_catalog
from pyiceberg.catalog.noop import NoopCatalog
from pyiceberg.expressions import BoundReference
from pyiceberg.io import (
- GCS_ENDPOINT,
GCS_PROJECT_ID,
+ GCS_SERVICE_HOST,
GCS_TOKEN,
GCS_TOKEN_EXPIRES_AT_MS,
fsspec,
@@ -1873,7 +1873,7 @@ def fsspec_fileio(request: pytest.FixtureRequest) ->
FsspecFileIO:
@pytest.fixture
def fsspec_fileio_gcs(request: pytest.FixtureRequest) -> FsspecFileIO:
properties = {
- GCS_ENDPOINT: request.config.getoption("--gcs.endpoint"),
+ GCS_SERVICE_HOST: request.config.getoption("--gcs.endpoint"),
GCS_TOKEN: request.config.getoption("--gcs.oauth2.token"),
GCS_PROJECT_ID: request.config.getoption("--gcs.project-id"),
}
@@ -1885,7 +1885,7 @@ def pyarrow_fileio_gcs(request: pytest.FixtureRequest) ->
"PyArrowFileIO":
from pyiceberg.io.pyarrow import PyArrowFileIO
properties = {
- GCS_ENDPOINT: request.config.getoption("--gcs.endpoint"),
+ GCS_SERVICE_HOST: request.config.getoption("--gcs.endpoint"),
GCS_TOKEN: request.config.getoption("--gcs.oauth2.token"),
GCS_PROJECT_ID: request.config.getoption("--gcs.project-id"),
GCS_TOKEN_EXPIRES_AT_MS: datetime_to_millis(datetime.now()) + 60 *
1000,