This is an automated email from the ASF dual-hosted git repository.
honahx pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new aa5a1366 [FEAT]register table using iceberg metadata file via
pyiceberg (#711)
aa5a1366 is described below
commit aa5a1366ec5ba4ef27cf2547cb90b6cc1dddf4df
Author: Mehul Batra <[email protected]>
AuthorDate: Thu May 23 02:54:13 2024 +0530
[FEAT]register table using iceberg metadata file via pyiceberg (#711)
---
pyiceberg/catalog/glue.py | 9 ++++++++-
tests/catalog/integration_test_glue.py | 16 ++++++++++++++++
tests/catalog/test_glue.py | 14 ++++++++++++++
3 files changed, 38 insertions(+), 1 deletion(-)
diff --git a/pyiceberg/catalog/glue.py b/pyiceberg/catalog/glue.py
index 275cda7e..8819c2e2 100644
--- a/pyiceberg/catalog/glue.py
+++ b/pyiceberg/catalog/glue.py
@@ -417,7 +417,14 @@ class GlueCatalog(MetastoreCatalog):
Raises:
TableAlreadyExistsError: If the table already exists
"""
- raise NotImplementedError
+ database_name, table_name =
self.identifier_to_database_and_table(identifier)
+ properties = EMPTY_DICT
+ io = self._load_file_io(location=metadata_location)
+ file = io.new_input(metadata_location)
+ metadata = FromInputFile.table_metadata(file)
+ table_input = _construct_table_input(table_name, metadata_location,
properties, metadata)
+ self._create_glue_table(database_name=database_name,
table_name=table_name, table_input=table_input)
+ return self.load_table(identifier=identifier)
def _commit_table(self, table_request: CommitTableRequest) ->
CommitTableResponse:
"""Update the table.
diff --git a/tests/catalog/integration_test_glue.py
b/tests/catalog/integration_test_glue.py
index 5b4aa587..ee437790 100644
--- a/tests/catalog/integration_test_glue.py
+++ b/tests/catalog/integration_test_glue.py
@@ -570,3 +570,19 @@ def test_table_exists(test_catalog: Catalog,
table_schema_nested: Schema, table_
test_catalog.create_namespace(database_name)
test_catalog.create_table((database_name, table_name), table_schema_nested)
assert test_catalog.table_exists((database_name, table_name)) is True
+
+
+def test_register_table_with_given_location(
+ test_catalog: Catalog, table_schema_nested: Schema, table_name: str,
database_name: str
+) -> None:
+ identifier = (database_name, table_name)
+ new_identifier = (database_name, f"new_{table_name}")
+ test_catalog.create_namespace(database_name)
+ tbl = test_catalog.create_table(identifier, table_schema_nested)
+ location = tbl.metadata_location
+ test_catalog.drop_table(identifier) # drops the table but keeps the
metadata file
+ assert not test_catalog.table_exists(identifier)
+ table = test_catalog.register_table(new_identifier, location)
+ assert table.identifier == (CATALOG_NAME,) + new_identifier
+ assert table.metadata_location == location
+ assert test_catalog.table_exists(new_identifier)
diff --git a/tests/catalog/test_glue.py b/tests/catalog/test_glue.py
index 5b67b92c..1aea46d6 100644
--- a/tests/catalog/test_glue.py
+++ b/tests/catalog/test_glue.py
@@ -848,3 +848,17 @@ def test_table_exists(
assert test_catalog.table_exists(identifier) is True
# Act and Assert for a non-existing table
assert test_catalog.table_exists(('non', 'exist')) is False
+
+
+@mock_aws
+def test_register_table_with_given_location(
+ _bucket_initialize: None, moto_endpoint_url: str, metadata_location: str,
database_name: str, table_name: str
+) -> None:
+ catalog_name = "glue"
+ identifier = (database_name, table_name)
+ location = metadata_location
+ test_catalog = GlueCatalog(catalog_name, **{"s3.endpoint":
moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}"})
+ test_catalog.create_namespace(namespace=database_name,
properties={"location": f"s3://{BUCKET_NAME}/{database_name}.db"})
+ table = test_catalog.register_table(identifier, location)
+ assert table.identifier == (catalog_name,) + identifier
+ assert test_catalog.table_exists(identifier) is True