This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new d796878  Implement pre-existing session support for dynamodb catalog 
(#104)
d796878 is described below

commit d796878e911a70ce71bad5f65e68e1da60e9e487
Author: waifairer <[email protected]>
AuthorDate: Thu Jan 18 03:56:30 2024 -0700

    Implement pre-existing session support for dynamodb catalog (#104)
---
 mkdocs/docs/configuration.md   | 13 +++++++++++++
 mkdocs/docs/contributing.md    |  4 +++-
 pyiceberg/catalog/dynamodb.py  | 10 +++++++++-
 tests/catalog/test_dynamodb.py | 28 ++++++++++++++++++++++++++--
 4 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md
index 12bb351..bfe1e62 100644
--- a/mkdocs/docs/configuration.md
+++ b/mkdocs/docs/configuration.md
@@ -218,6 +218,19 @@ catalog:
     table-name: iceberg
 ```
 
+If you prefer to pass the credentials explicitly to the client instead of 
relying on environment variables,
+
+```yaml
+catalog:
+  default:
+    type: dynamodb
+    table-name: iceberg
+    aws_access_key_id: <ACCESS_KEY_ID>
+    aws_secret_access_key: <SECRET_ACCESS_KEY>
+    aws_session_token: <SESSION_TOKEN>
+    region_name: <REGION_NAME>
+```
+
 # Concurrency
 
 PyIceberg uses multiple threads to parallelize operations. The number of 
workers can be configured by supplying a `max-workers` entry in the 
configuration file, or by setting the `PYICEBERG_MAX_WORKERS` environment 
variable. The default value depends on the system hardware and Python version. 
See [the Python 
documentation](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor)
 for more details.
diff --git a/mkdocs/docs/contributing.md b/mkdocs/docs/contributing.md
index 3973b76..8ec6dcb 100644
--- a/mkdocs/docs/contributing.md
+++ b/mkdocs/docs/contributing.md
@@ -30,10 +30,12 @@ For the development, Poetry is used for packing and 
dependency management. You c
 pip install poetry
 ```
 
-If you have an older version of pip and virtualenv you need to update these:
+Make sure you're using an up-to-date environment from venv
 
 ```bash
 pip install --upgrade virtualenv pip
+python -m venv ./venv
+source ./venv/bin/activate
 ```
 
 To get started, you can run `make install`, which installs Poetry and all the 
dependencies of the Iceberg library. This also installs the development 
dependencies. If you don't want to install the development dependencies, you 
need to install using `poetry install --no-dev`.
diff --git a/pyiceberg/catalog/dynamodb.py b/pyiceberg/catalog/dynamodb.py
index 3eee95d..6c3f931 100644
--- a/pyiceberg/catalog/dynamodb.py
+++ b/pyiceberg/catalog/dynamodb.py
@@ -80,7 +80,15 @@ ITEM = "Item"
 class DynamoDbCatalog(Catalog):
     def __init__(self, name: str, **properties: str):
         super().__init__(name, **properties)
-        self.dynamodb = boto3.client(DYNAMODB_CLIENT)
+        session = boto3.Session(
+            profile_name=properties.get("profile_name"),
+            region_name=properties.get("region_name"),
+            botocore_session=properties.get("botocore_session"),
+            aws_access_key_id=properties.get("aws_access_key_id"),
+            aws_secret_access_key=properties.get("aws_secret_access_key"),
+            aws_session_token=properties.get("aws_session_token"),
+        )
+        self.dynamodb = session.client(DYNAMODB_CLIENT)
         self.dynamodb_table_name = self.properties.get(DYNAMODB_TABLE_NAME, 
DYNAMODB_TABLE_NAME_DEFAULT)
         self._ensure_catalog_table_exists_or_create()
 
diff --git a/tests/catalog/test_dynamodb.py b/tests/catalog/test_dynamodb.py
index 917a5d2..fb4eaaa 100644
--- a/tests/catalog/test_dynamodb.py
+++ b/tests/catalog/test_dynamodb.py
@@ -14,11 +14,12 @@
 #  KIND, either express or implied.  See the License for the
 #  specific language governing permissions and limitations
 #  under the License.
-from typing import List
+from typing import Any, Dict, List
 
 import boto3
 import pytest
 from moto import mock_dynamodb
+from unittest import mock
 
 from pyiceberg.catalog import METADATA_LOCATION, TABLE_TYPE
 from pyiceberg.catalog.dynamodb import (
@@ -26,6 +27,7 @@ from pyiceberg.catalog.dynamodb import (
     DYNAMODB_COL_IDENTIFIER,
     DYNAMODB_COL_NAMESPACE,
     DYNAMODB_TABLE_NAME_DEFAULT,
+    ACTIVE,
     DynamoDbCatalog,
     _add_property_prefix,
 )
@@ -47,12 +49,13 @@ def test_create_dynamodb_catalog_with_table_name(_dynamodb, 
_bucket_initialize:
     DynamoDbCatalog("test_ddb_catalog")
     response = _dynamodb.describe_table(TableName=DYNAMODB_TABLE_NAME_DEFAULT)
     assert response["Table"]["TableName"] == DYNAMODB_TABLE_NAME_DEFAULT
+    assert response["Table"]["TableStatus"] == ACTIVE
 
     custom_table_name = "custom_table_name"
     DynamoDbCatalog("test_ddb_catalog", **{"table-name": custom_table_name})
     response = _dynamodb.describe_table(TableName=custom_table_name)
     assert response["Table"]["TableName"] == custom_table_name
-
+    assert response["Table"]["TableStatus"] == ACTIVE
 
 @mock_dynamodb
 def test_create_table_with_database_location(
@@ -506,3 +509,24 @@ def 
test_update_namespace_properties_overlap_update_removal(_bucket_initialize:
         test_catalog.update_namespace_properties(database_name, removals, 
updates)
     # should not modify the properties
     assert test_catalog.load_namespace_properties(database_name) == 
test_properties
+
+def test_passing_provided_profile() -> None:
+    catalog_name = "test_ddb_catalog"
+    session_props = {
+        "aws_access_key_id": "abc",
+        "aws_secret_access_key": "def",
+        "aws_session_token": "ghi",
+        "region_name": "eu-central-1",
+        "botocore_session": None,
+        "profile_name": None
+    }
+    props = {"py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}
+    props.update(session_props)
+    with mock.patch('boto3.Session', return_value=mock.Mock()) as mock_session:
+        mock_client = mock.Mock()
+        mock_session.return_value.client.return_value = mock_client
+        mock_client.describe_table.return_value = {'Table': {'TableStatus': 
'ACTIVE'}}
+        test_catalog = DynamoDbCatalog(catalog_name, **props)
+        assert test_catalog.dynamodb is mock_client
+        mock_session.assert_called_with(**session_props)
+        assert test_catalog.dynamodb is mock_session().client()

Reply via email to