This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new d796878 Implement pre-existing session support for dynamodb catalog
(#104)
d796878 is described below
commit d796878e911a70ce71bad5f65e68e1da60e9e487
Author: waifairer <[email protected]>
AuthorDate: Thu Jan 18 03:56:30 2024 -0700
Implement pre-existing session support for dynamodb catalog (#104)
---
mkdocs/docs/configuration.md | 13 +++++++++++++
mkdocs/docs/contributing.md | 4 +++-
pyiceberg/catalog/dynamodb.py | 10 +++++++++-
tests/catalog/test_dynamodb.py | 28 ++++++++++++++++++++++++++--
4 files changed, 51 insertions(+), 4 deletions(-)
diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md
index 12bb351..bfe1e62 100644
--- a/mkdocs/docs/configuration.md
+++ b/mkdocs/docs/configuration.md
@@ -218,6 +218,19 @@ catalog:
table-name: iceberg
```
+If you prefer to pass the credentials explicitly to the client instead of
relying on environment variables,
+
+```yaml
+catalog:
+ default:
+ type: dynamodb
+ table-name: iceberg
+ aws_access_key_id: <ACCESS_KEY_ID>
+ aws_secret_access_key: <SECRET_ACCESS_KEY>
+ aws_session_token: <SESSION_TOKEN>
+ region_name: <REGION_NAME>
+```
+
# Concurrency
PyIceberg uses multiple threads to parallelize operations. The number of
workers can be configured by supplying a `max-workers` entry in the
configuration file, or by setting the `PYICEBERG_MAX_WORKERS` environment
variable. The default value depends on the system hardware and Python version.
See [the Python
documentation](https://docs.python.org/3/library/concurrent.futures.html#threadpoolexecutor)
for more details.
diff --git a/mkdocs/docs/contributing.md b/mkdocs/docs/contributing.md
index 3973b76..8ec6dcb 100644
--- a/mkdocs/docs/contributing.md
+++ b/mkdocs/docs/contributing.md
@@ -30,10 +30,12 @@ For the development, Poetry is used for packing and
dependency management. You c
pip install poetry
```
-If you have an older version of pip and virtualenv you need to update these:
+Make sure you're using an up-to-date environment from venv
```bash
pip install --upgrade virtualenv pip
+python -m venv ./venv
+source ./venv/bin/activate
```
To get started, you can run `make install`, which installs Poetry and all the
dependencies of the Iceberg library. This also installs the development
dependencies. If you don't want to install the development dependencies, you
need to install using `poetry install --no-dev`.
diff --git a/pyiceberg/catalog/dynamodb.py b/pyiceberg/catalog/dynamodb.py
index 3eee95d..6c3f931 100644
--- a/pyiceberg/catalog/dynamodb.py
+++ b/pyiceberg/catalog/dynamodb.py
@@ -80,7 +80,15 @@ ITEM = "Item"
class DynamoDbCatalog(Catalog):
def __init__(self, name: str, **properties: str):
super().__init__(name, **properties)
- self.dynamodb = boto3.client(DYNAMODB_CLIENT)
+ session = boto3.Session(
+ profile_name=properties.get("profile_name"),
+ region_name=properties.get("region_name"),
+ botocore_session=properties.get("botocore_session"),
+ aws_access_key_id=properties.get("aws_access_key_id"),
+ aws_secret_access_key=properties.get("aws_secret_access_key"),
+ aws_session_token=properties.get("aws_session_token"),
+ )
+ self.dynamodb = session.client(DYNAMODB_CLIENT)
self.dynamodb_table_name = self.properties.get(DYNAMODB_TABLE_NAME,
DYNAMODB_TABLE_NAME_DEFAULT)
self._ensure_catalog_table_exists_or_create()
diff --git a/tests/catalog/test_dynamodb.py b/tests/catalog/test_dynamodb.py
index 917a5d2..fb4eaaa 100644
--- a/tests/catalog/test_dynamodb.py
+++ b/tests/catalog/test_dynamodb.py
@@ -14,11 +14,12 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-from typing import List
+from typing import Any, Dict, List
import boto3
import pytest
from moto import mock_dynamodb
+from unittest import mock
from pyiceberg.catalog import METADATA_LOCATION, TABLE_TYPE
from pyiceberg.catalog.dynamodb import (
@@ -26,6 +27,7 @@ from pyiceberg.catalog.dynamodb import (
DYNAMODB_COL_IDENTIFIER,
DYNAMODB_COL_NAMESPACE,
DYNAMODB_TABLE_NAME_DEFAULT,
+ ACTIVE,
DynamoDbCatalog,
_add_property_prefix,
)
@@ -47,12 +49,13 @@ def test_create_dynamodb_catalog_with_table_name(_dynamodb,
_bucket_initialize:
DynamoDbCatalog("test_ddb_catalog")
response = _dynamodb.describe_table(TableName=DYNAMODB_TABLE_NAME_DEFAULT)
assert response["Table"]["TableName"] == DYNAMODB_TABLE_NAME_DEFAULT
+ assert response["Table"]["TableStatus"] == ACTIVE
custom_table_name = "custom_table_name"
DynamoDbCatalog("test_ddb_catalog", **{"table-name": custom_table_name})
response = _dynamodb.describe_table(TableName=custom_table_name)
assert response["Table"]["TableName"] == custom_table_name
-
+ assert response["Table"]["TableStatus"] == ACTIVE
@mock_dynamodb
def test_create_table_with_database_location(
@@ -506,3 +509,24 @@ def
test_update_namespace_properties_overlap_update_removal(_bucket_initialize:
test_catalog.update_namespace_properties(database_name, removals,
updates)
# should not modify the properties
assert test_catalog.load_namespace_properties(database_name) ==
test_properties
+
+def test_passing_provided_profile() -> None:
+ catalog_name = "test_ddb_catalog"
+ session_props = {
+ "aws_access_key_id": "abc",
+ "aws_secret_access_key": "def",
+ "aws_session_token": "ghi",
+ "region_name": "eu-central-1",
+ "botocore_session": None,
+ "profile_name": None
+ }
+ props = {"py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"}
+ props.update(session_props)
+ with mock.patch('boto3.Session', return_value=mock.Mock()) as mock_session:
+ mock_client = mock.Mock()
+ mock_session.return_value.client.return_value = mock_client
+ mock_client.describe_table.return_value = {'Table': {'TableStatus':
'ACTIVE'}}
+ test_catalog = DynamoDbCatalog(catalog_name, **props)
+ assert test_catalog.dynamodb is mock_client
+ mock_session.assert_called_with(**session_props)
+ assert test_catalog.dynamodb is mock_session().client()