This is an automated email from the ASF dual-hosted git repository.

kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new c7b55b12 Rename `gcs.endpoint` to `gcs.service.host` (#1007)
c7b55b12 is described below

commit c7b55b12c5d2e3a9ae90ac21016299c7dff46fff
Author: Fokko Driesprong <[email protected]>
AuthorDate: Wed Nov 6 18:44:21 2024 +0100

    Rename `gcs.endpoint` to `gcs.service.host` (#1007)
    
    * Rename `gcs.endpoint` to `gcs.service.host`
    
    To make it in line with Java:
    
    
https://github.com/apache/iceberg/blob/6ee6d1327d3811dbd5795c4e87efdc41b7a58eaa/gcp/src/main/java/org/apache/iceberg/gcp/GCPProperties.java#L32
    
    * Import
    
    Co-authored-by: Andre Luis Anastacio <[email protected]>
    
    * Use `deprecation_message` instead
    
    Co-authored-by: Andre Luis Anastacio <[email protected]>
    
    * Use `deprecation_message` instead
    
    Co-authored-by: Andre Luis Anastacio <[email protected]>
    
    * Fix message
    
    * Update pyiceberg/io/fsspec.py
    
    Co-authored-by: Kevin Liu <[email protected]>
    
    * Update pyiceberg/io/fsspec.py
    
    Co-authored-by: Kevin Liu <[email protected]>
    
    * Update pyiceberg/io/pyarrow.py
    
    Co-authored-by: Kevin Liu <[email protected]>
    
    ---------
    
    Co-authored-by: Andre Luis Anastacio <[email protected]>
    Co-authored-by: Kevin Liu <[email protected]>
---
 mkdocs/docs/configuration.md |  2 +-
 pyiceberg/io/__init__.py     |  1 +
 pyiceberg/io/fsspec.py       |  9 ++++++++-
 pyiceberg/io/pyarrow.py      | 11 +++++++++--
 tests/conftest.py            |  6 +++---
 5 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md
index ba77867b..8302f355 100644
--- a/mkdocs/docs/configuration.md
+++ b/mkdocs/docs/configuration.md
@@ -161,7 +161,7 @@ For the FileIO there are several configuration options 
available:
 | gcs.cache-timeout           | 60                  | Configure the cache 
expiration time in seconds for object metadata cache                            
                                                                                
                                                                |
 | gcs.requester-pays          | False               | Configure whether to use 
requester-pays requests                                                         
                                                                                
                                                           |
 | gcs.session-kwargs          | {}                  | Configure a dict of 
parameters to pass on to aiohttp.ClientSession; can contain, for example, proxy 
settings.                                                                       
                                                                |
-| gcs.endpoint                | <http://0.0.0.0:4443> | Configure an 
alternative endpoint for the GCS FileIO to access (format protocol://host:port) 
If not given, defaults to the value of environment variable 
"STORAGE_EMULATOR_HOST"; if that is not set either, will use the standard 
Google endpoint. |
+| gcs.service.host            | <http://0.0.0.0:4443> | Configure an 
alternative endpoint for the GCS FileIO to access (format protocol://host:port) 
If not given, defaults to the value of environment variable 
"STORAGE_EMULATOR_HOST"; if that is not set either, will use the standard 
Google endpoint. |
 | gcs.default-location        | US                  | Configure the default 
location where buckets are created, like 'US' or 'EUROPE-WEST3'.                
                                                                                
                                                              |
 | gcs.version-aware           | False               | Configure whether to 
support object versioning on the GCS bucket.                                    
                                                                                
                                                               |
 
diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py
index 23a2cf35..5aded82d 100644
--- a/pyiceberg/io/__init__.py
+++ b/pyiceberg/io/__init__.py
@@ -94,6 +94,7 @@ GCS_CACHE_TIMEOUT = "gcs.cache-timeout"
 GCS_REQUESTER_PAYS = "gcs.requester-pays"
 GCS_SESSION_KWARGS = "gcs.session-kwargs"
 GCS_ENDPOINT = "gcs.endpoint"
+GCS_SERVICE_HOST = "gcs.service.host"
 GCS_DEFAULT_LOCATION = "gcs.default-bucket-location"
 GCS_VERSION_AWARE = "gcs.version-aware"
 PYARROW_USE_LARGE_TYPES_ON_READ = "pyarrow.use-large-types-on-read"
diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py
index 1632c4bb..434ae67d 100644
--- a/pyiceberg/io/fsspec.py
+++ b/pyiceberg/io/fsspec.py
@@ -64,6 +64,7 @@ from pyiceberg.io import (
     GCS_ENDPOINT,
     GCS_PROJECT_ID,
     GCS_REQUESTER_PAYS,
+    GCS_SERVICE_HOST,
     GCS_SESSION_KWARGS,
     GCS_TOKEN,
     GCS_VERSION_AWARE,
@@ -171,6 +172,12 @@ def _gs(properties: Properties) -> AbstractFileSystem:
     # https://gcsfs.readthedocs.io/en/latest/api.html#gcsfs.core.GCSFileSystem
     from gcsfs import GCSFileSystem
 
+    if properties.get(GCS_ENDPOINT):
+        deprecation_message(
+            deprecated_in="0.8.0",
+            removed_in="0.9.0",
+            help_message=f"The property {GCS_ENDPOINT} is deprecated, please 
use {GCS_SERVICE_HOST} instead",
+        )
     return GCSFileSystem(
         project=properties.get(GCS_PROJECT_ID),
         access=properties.get(GCS_ACCESS, "full_control"),
@@ -179,7 +186,7 @@ def _gs(properties: Properties) -> AbstractFileSystem:
         cache_timeout=properties.get(GCS_CACHE_TIMEOUT),
         requester_pays=property_as_bool(properties, GCS_REQUESTER_PAYS, False),
         session_kwargs=json.loads(properties.get(GCS_SESSION_KWARGS, "{}")),
-        endpoint_url=properties.get(GCS_ENDPOINT),
+        endpoint_url=get_first_property_value(properties, GCS_SERVICE_HOST, 
GCS_ENDPOINT),
         default_location=properties.get(GCS_DEFAULT_LOCATION),
         version_aware=property_as_bool(properties, GCS_VERSION_AWARE, False),
     )
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index a053b83a..52a2756a 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -91,6 +91,7 @@ from pyiceberg.io import (
     AWS_SESSION_TOKEN,
     GCS_DEFAULT_LOCATION,
     GCS_ENDPOINT,
+    GCS_SERVICE_HOST,
     GCS_TOKEN,
     GCS_TOKEN_EXPIRES_AT_MS,
     HDFS_HOST,
@@ -163,7 +164,7 @@ from pyiceberg.types import (
 from pyiceberg.utils.concurrent import ExecutorFactory
 from pyiceberg.utils.config import Config
 from pyiceberg.utils.datetime import millis_to_datetime
-from pyiceberg.utils.deprecated import deprecated
+from pyiceberg.utils.deprecated import deprecated, deprecation_message
 from pyiceberg.utils.properties import get_first_property_value, 
property_as_bool, property_as_int
 from pyiceberg.utils.singleton import Singleton
 from pyiceberg.utils.truncate import truncate_upper_bound_binary_string, 
truncate_upper_bound_text_string
@@ -400,7 +401,13 @@ class PyArrowFileIO(FileIO):
                 gcs_kwargs["credential_token_expiration"] = 
millis_to_datetime(int(expiration))
             if bucket_location := self.properties.get(GCS_DEFAULT_LOCATION):
                 gcs_kwargs["default_bucket_location"] = bucket_location
-            if endpoint := self.properties.get(GCS_ENDPOINT):
+            if endpoint := get_first_property_value(self.properties, 
GCS_SERVICE_HOST, GCS_ENDPOINT):
+                if self.properties.get(GCS_ENDPOINT):
+                    deprecation_message(
+                        deprecated_in="0.8.0",
+                        removed_in="0.9.0",
+                        help_message=f"The property {GCS_ENDPOINT} is 
deprecated, please use {GCS_SERVICE_HOST} instead",
+                    )
                 url_parts = urlparse(endpoint)
                 gcs_kwargs["scheme"] = url_parts.scheme
                 gcs_kwargs["endpoint_override"] = url_parts.netloc
diff --git a/tests/conftest.py b/tests/conftest.py
index e7e73375..9160a143 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -53,8 +53,8 @@ from pyiceberg.catalog import Catalog, load_catalog
 from pyiceberg.catalog.noop import NoopCatalog
 from pyiceberg.expressions import BoundReference
 from pyiceberg.io import (
-    GCS_ENDPOINT,
     GCS_PROJECT_ID,
+    GCS_SERVICE_HOST,
     GCS_TOKEN,
     GCS_TOKEN_EXPIRES_AT_MS,
     fsspec,
@@ -1873,7 +1873,7 @@ def fsspec_fileio(request: pytest.FixtureRequest) -> 
FsspecFileIO:
 @pytest.fixture
 def fsspec_fileio_gcs(request: pytest.FixtureRequest) -> FsspecFileIO:
     properties = {
-        GCS_ENDPOINT: request.config.getoption("--gcs.endpoint"),
+        GCS_SERVICE_HOST: request.config.getoption("--gcs.endpoint"),
         GCS_TOKEN: request.config.getoption("--gcs.oauth2.token"),
         GCS_PROJECT_ID: request.config.getoption("--gcs.project-id"),
     }
@@ -1885,7 +1885,7 @@ def pyarrow_fileio_gcs(request: pytest.FixtureRequest) -> 
"PyArrowFileIO":
     from pyiceberg.io.pyarrow import PyArrowFileIO
 
     properties = {
-        GCS_ENDPOINT: request.config.getoption("--gcs.endpoint"),
+        GCS_SERVICE_HOST: request.config.getoption("--gcs.endpoint"),
         GCS_TOKEN: request.config.getoption("--gcs.oauth2.token"),
         GCS_PROJECT_ID: request.config.getoption("--gcs.project-id"),
         GCS_TOKEN_EXPIRES_AT_MS: datetime_to_millis(datetime.now()) + 60 * 
1000,

Reply via email to