This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 6e7a4c9dad Python: Add S3 proxies to PyIceberg Catalog FileIO (#7958)
6e7a4c9dad is described below

commit 6e7a4c9dadb841cfd944bedddb14d328ac93ec44
Author: Sung Yun <[email protected]>
AuthorDate: Thu Jul 6 03:35:33 2023 -0400

    Python: Add S3 proxies to PyIceberg Catalog FileIO (#7958)
    
    * proxies
    
    * proxies config
    
    * typo
    
    * one proxy-uri cofig
    
    * isort
    
    * lint
    
    * proxy uri for both http and https on s3fs
    
    * lint
    
    * rename property
    
    * s3.proxy-uri
    
    * lint
---
 python/mkdocs/docs/configuration.md | 15 ++++++++-------
 python/pyiceberg/io/__init__.py     |  1 +
 python/pyiceberg/io/fsspec.py       |  4 ++++
 python/pyiceberg/io/pyarrow.py      |  5 +++++
 4 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/python/mkdocs/docs/configuration.md 
b/python/mkdocs/docs/configuration.md
index 65808581f8..f461f249c1 100644
--- a/python/mkdocs/docs/configuration.md
+++ b/python/mkdocs/docs/configuration.md
@@ -60,13 +60,14 @@ For the FileIO there are several configuration options 
available:
 
 ### S3
 
-| Key                  | Example             | Description                     
                                                                                
                                                                                
                                                          |
-| -------------------- | ------------------- | 
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 |
-| s3.endpoint          | https://10.0.19.25/ | Configure an alternative 
endpoint of the S3 service for the FileIO to access. This could be used to use 
S3FileIO with any s3-compatible object storage service that has a different 
endpoint, or access a private S3 endpoint in a virtual private cloud. |
-| s3.access-key-id     | admin               | Configure the static secret 
access key used to access the FileIO.                                           
                                                                                
                                                              |
-| s3.secret-access-key | password            | Configure the static session 
token used to access the FileIO.                                                
                                                                                
                                                             |
-| s3.signer            | bearer              | Configure the signature version 
of the FileIO.                                                                  
                                                                                
                                                          |
-| s3.region            | us-west-2           | Sets the region of the bucket   
                                                                                
                                                                                
                                                          |
+| Key                  | Example                  | Description                
                                                                                
                                                                                
                                                               |
+| -------------------- | ------------------------ | 
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 |
+| s3.endpoint          | https://10.0.19.25/      | Configure an alternative 
endpoint of the S3 service for the FileIO to access. This could be used to use 
S3FileIO with any s3-compatible object storage service that has a different 
endpoint, or access a private S3 endpoint in a virtual private cloud. |
+| s3.access-key-id     | admin                    | Configure the static 
secret access key used to access the FileIO.                                    
                                                                                
                                                                     |
+| s3.secret-access-key | password                 | Configure the static 
session token used to access the FileIO.                                        
                                                                                
                                                                     |
+| s3.signer            | bearer                   | Configure the signature 
version of the FileIO.                                                          
                                                                                
                                                                  |
+| s3.region            | us-west-2                | Sets the region of the 
bucket                                                                          
                                                                                
                                                                   |
+| s3.proxy-uri         | http://my.proxy.com:8080 | Configure the proxy server 
to be used by the FileIO.                                                       
                                                                                
                                                               |
 
 ### Azure Data lake
 
diff --git a/python/pyiceberg/io/__init__.py b/python/pyiceberg/io/__init__.py
index 2c82d890e4..c477c1ac4c 100644
--- a/python/pyiceberg/io/__init__.py
+++ b/python/pyiceberg/io/__init__.py
@@ -50,6 +50,7 @@ S3_ACCESS_KEY_ID = "s3.access-key-id"
 S3_SECRET_ACCESS_KEY = "s3.secret-access-key"
 S3_SESSION_TOKEN = "s3.session-token"
 S3_REGION = "s3.region"
+S3_PROXY_URI = "s3.proxy-uri"
 
 
 @runtime_checkable
diff --git a/python/pyiceberg/io/fsspec.py b/python/pyiceberg/io/fsspec.py
index c4484130ed..65472904d0 100644
--- a/python/pyiceberg/io/fsspec.py
+++ b/python/pyiceberg/io/fsspec.py
@@ -39,6 +39,7 @@ from pyiceberg.exceptions import SignError
 from pyiceberg.io import (
     S3_ACCESS_KEY_ID,
     S3_ENDPOINT,
+    S3_PROXY_URI,
     S3_REGION,
     S3_SECRET_ACCESS_KEY,
     S3_SESSION_TOKEN,
@@ -112,6 +113,9 @@ def _s3(properties: Properties) -> AbstractFileSystem:
         else:
             raise ValueError(f"Signer not available: {signer}")
 
+    if proxy_uri := properties.get(S3_PROXY_URI):
+        config_kwargs["proxies"] = {"http": proxy_uri, "https": proxy_uri}
+
     fs = S3FileSystem(client_kwargs=client_kwargs, config_kwargs=config_kwargs)
 
     for event_name, event_function in register_events.items():
diff --git a/python/pyiceberg/io/pyarrow.py b/python/pyiceberg/io/pyarrow.py
index 89ddf805eb..3a22f24b0b 100644
--- a/python/pyiceberg/io/pyarrow.py
+++ b/python/pyiceberg/io/pyarrow.py
@@ -80,6 +80,7 @@ from pyiceberg.expressions.visitors import visit as 
boolean_expression_visit
 from pyiceberg.io import (
     S3_ACCESS_KEY_ID,
     S3_ENDPOINT,
+    S3_PROXY_URI,
     S3_REGION,
     S3_SECRET_ACCESS_KEY,
     S3_SESSION_TOKEN,
@@ -294,6 +295,10 @@ class PyArrowFileIO(FileIO):
                 "session_token": self.properties.get(S3_SESSION_TOKEN),
                 "region": self.properties.get(S3_REGION),
             }
+
+            if proxy_uri := self.properties.get(S3_PROXY_URI):
+                client_kwargs["proxy_options"] = proxy_uri
+
             return S3FileSystem(**client_kwargs)
         elif scheme == "file":
             return LocalFileSystem()

Reply via email to