This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new eeb0253373 Python: Add catalog name to identifiers (#7946)
eeb0253373 is described below

commit eeb0253373322bf2e8eb195955e21ff9904b8e1b
Author: Jonas(Rushan) Jiang <[email protected]>
AuthorDate: Thu Jul 6 00:34:23 2023 -0700

    Python: Add catalog name to identifiers (#7946)
    
    * add catalog name to the identifier of table returned by glue catalog
    
    * add catalog name to the identifier of table returned by hive catalog
---
 python/pyiceberg/catalog/glue.py              |  2 +-
 python/pyiceberg/catalog/hive.py              |  2 +-
 python/tests/catalog/integration_test_glue.py | 15 +++++----
 python/tests/catalog/test_glue.py             | 47 ++++++++++++++++++---------
 python/tests/catalog/test_hive.py             |  2 +-
 5 files changed, 42 insertions(+), 26 deletions(-)

diff --git a/python/pyiceberg/catalog/glue.py b/python/pyiceberg/catalog/glue.py
index 7e06f2e47f..1d5160dbaa 100644
--- a/python/pyiceberg/catalog/glue.py
+++ b/python/pyiceberg/catalog/glue.py
@@ -166,7 +166,7 @@ class GlueCatalog(Catalog):
         file = io.new_input(metadata_location)
         metadata = FromInputFile.table_metadata(file)
         return Table(
-            identifier=(glue_table[PROP_GLUE_TABLE_DATABASE_NAME], 
glue_table[PROP_GLUE_TABLE_NAME]),
+            identifier=(self.name, glue_table[PROP_GLUE_TABLE_DATABASE_NAME], 
glue_table[PROP_GLUE_TABLE_NAME]),
             metadata=metadata,
             metadata_location=metadata_location,
             io=self._load_file_io(metadata.properties, metadata_location),
diff --git a/python/pyiceberg/catalog/hive.py b/python/pyiceberg/catalog/hive.py
index 839fb2a3d5..08655676de 100644
--- a/python/pyiceberg/catalog/hive.py
+++ b/python/pyiceberg/catalog/hive.py
@@ -239,7 +239,7 @@ class HiveCatalog(Catalog):
         file = io.new_input(metadata_location)
         metadata = FromInputFile.table_metadata(file)
         return Table(
-            identifier=(table.dbName, table.tableName),
+            identifier=(self.name, table.dbName, table.tableName),
             metadata=metadata,
             metadata_location=metadata_location,
             io=self._load_file_io(metadata.properties, metadata_location),
diff --git a/python/tests/catalog/integration_test_glue.py 
b/python/tests/catalog/integration_test_glue.py
index 6f07720b0a..bd025f2a3d 100644
--- a/python/tests/catalog/integration_test_glue.py
+++ b/python/tests/catalog/integration_test_glue.py
@@ -35,6 +35,7 @@ from tests.conftest import clean_up, get_bucket_name, 
get_s3_path
 
 # The number of tables/databases used in list_table/namespace test
 LIST_TEST_NUMBER = 2
+CATALOG_NAME = "glue"
 
 
 @pytest.fixture(name="glue", scope="module")
@@ -45,7 +46,7 @@ def fixture_glue_client() -> boto3.client:
 @pytest.fixture(name="test_catalog", scope="module")
 def fixture_test_catalog() -> Generator[Catalog, None, None]:
     """The pre- and post-setting of aws integration test."""
-    test_catalog = GlueCatalog("glue", 
warehouse=get_s3_path(get_bucket_name()))
+    test_catalog = GlueCatalog(CATALOG_NAME, 
warehouse=get_s3_path(get_bucket_name()))
     yield test_catalog
     clean_up(test_catalog)
 
@@ -57,7 +58,7 @@ def test_create_table(
     test_catalog.create_namespace(database_name)
     test_catalog.create_table(identifier, table_schema_nested, 
get_s3_path(get_bucket_name(), database_name, table_name))
     table = test_catalog.load_table(identifier)
-    assert table.identifier == identifier
+    assert table.identifier == (CATALOG_NAME,) + identifier
     metadata_location = table.metadata_location.split(get_bucket_name())[1][1:]
     s3.head_object(Bucket=get_bucket_name(), Key=metadata_location)
 
@@ -78,7 +79,7 @@ def test_create_table_with_default_location(
     test_catalog.create_namespace(database_name)
     test_catalog.create_table(identifier, table_schema_nested)
     table = test_catalog.load_table(identifier)
-    assert table.identifier == identifier
+    assert table.identifier == (CATALOG_NAME,) + identifier
     metadata_location = table.metadata_location.split(get_bucket_name())[1][1:]
     s3.head_object(Bucket=get_bucket_name(), Key=metadata_location)
 
@@ -125,11 +126,11 @@ def test_rename_table(
     new_table_name = f"rename-{table_name}"
     identifier = (database_name, table_name)
     table = test_catalog.create_table(identifier, table_schema_nested)
-    assert table.identifier == identifier
+    assert table.identifier == (CATALOG_NAME,) + identifier
     new_identifier = (new_database_name, new_table_name)
     test_catalog.rename_table(identifier, new_identifier)
     new_table = test_catalog.load_table(new_identifier)
-    assert new_table.identifier == new_identifier
+    assert new_table.identifier == (CATALOG_NAME,) + new_identifier
     assert new_table.metadata_location == table.metadata_location
     metadata_location = 
new_table.metadata_location.split(get_bucket_name())[1][1:]
     s3.head_object(Bucket=get_bucket_name(), Key=metadata_location)
@@ -141,7 +142,7 @@ def test_drop_table(test_catalog: Catalog, 
table_schema_nested: Schema, table_na
     identifier = (database_name, table_name)
     test_catalog.create_namespace(database_name)
     table = test_catalog.create_table(identifier, table_schema_nested)
-    assert table.identifier == identifier
+    assert table.identifier == (CATALOG_NAME,) + identifier
     test_catalog.drop_table(identifier)
     with pytest.raises(NoSuchTableError):
         test_catalog.load_table(identifier)
@@ -154,7 +155,7 @@ def test_purge_table(
     test_catalog.create_namespace(database_name)
     test_catalog.create_table(identifier, table_schema_nested)
     table = test_catalog.load_table(identifier)
-    assert table.identifier == identifier
+    assert table.identifier == (CATALOG_NAME,) + identifier
     metadata_location = table.metadata_location.split(get_bucket_name())[1][1:]
     s3.head_object(Bucket=get_bucket_name(), Key=metadata_location)
     test_catalog.purge_table(identifier)
diff --git a/python/tests/catalog/test_glue.py 
b/python/tests/catalog/test_glue.py
index cf2e75c7d3..d217416697 100644
--- a/python/tests/catalog/test_glue.py
+++ b/python/tests/catalog/test_glue.py
@@ -38,11 +38,12 @@ from tests.conftest import BUCKET_NAME, 
TABLE_METADATA_LOCATION_REGEX
 def test_create_table_with_database_location(
     _bucket_initialize: None, _patch_aiobotocore: None, table_schema_nested: 
Schema, database_name: str, table_name: str
 ) -> None:
+    catalog_name = "glue"
     identifier = (database_name, table_name)
-    test_catalog = GlueCatalog("glue", **{"py-io-impl": 
"pyiceberg.io.fsspec.FsspecFileIO"})
+    test_catalog = GlueCatalog(catalog_name, **{"py-io-impl": 
"pyiceberg.io.fsspec.FsspecFileIO"})
     test_catalog.create_namespace(namespace=database_name, 
properties={"location": f"s3://{BUCKET_NAME}/{database_name}.db"})
     table = test_catalog.create_table(identifier, table_schema_nested)
-    assert table.identifier == identifier
+    assert table.identifier == (catalog_name,) + identifier
     assert TABLE_METADATA_LOCATION_REGEX.match(table.metadata_location)
 
 
@@ -50,11 +51,14 @@ def test_create_table_with_database_location(
 def test_create_table_with_default_warehouse(
     _bucket_initialize: None, _patch_aiobotocore: None, table_schema_nested: 
Schema, database_name: str, table_name: str
 ) -> None:
+    catalog_name = "glue"
     identifier = (database_name, table_name)
-    test_catalog = GlueCatalog("glue", **{"py-io-impl": 
"pyiceberg.io.fsspec.FsspecFileIO", "warehouse": f"s3://{BUCKET_NAME}"})
+    test_catalog = GlueCatalog(
+        catalog_name, **{"py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO", 
"warehouse": f"s3://{BUCKET_NAME}"}
+    )
     test_catalog.create_namespace(namespace=database_name)
     table = test_catalog.create_table(identifier, table_schema_nested)
-    assert table.identifier == identifier
+    assert table.identifier == (catalog_name,) + identifier
     assert TABLE_METADATA_LOCATION_REGEX.match(table.metadata_location)
 
 
@@ -62,13 +66,14 @@ def test_create_table_with_default_warehouse(
 def test_create_table_with_given_location(
     _bucket_initialize: None, _patch_aiobotocore: None, table_schema_nested: 
Schema, database_name: str, table_name: str
 ) -> None:
+    catalog_name = "glue"
     identifier = (database_name, table_name)
-    test_catalog = GlueCatalog("glue", **{"py-io-impl": 
"pyiceberg.io.fsspec.FsspecFileIO"})
+    test_catalog = GlueCatalog(catalog_name, **{"py-io-impl": 
"pyiceberg.io.fsspec.FsspecFileIO"})
     test_catalog.create_namespace(namespace=database_name)
     table = test_catalog.create_table(
         identifier=identifier, schema=table_schema_nested, 
location=f"s3://{BUCKET_NAME}/{database_name}.db/{table_name}"
     )
-    assert table.identifier == identifier
+    assert table.identifier == (catalog_name,) + identifier
     assert TABLE_METADATA_LOCATION_REGEX.match(table.metadata_location)
 
 
@@ -76,8 +81,9 @@ def test_create_table_with_given_location(
 def test_create_table_with_no_location(
     _bucket_initialize: None, _patch_aiobotocore: None, table_schema_nested: 
Schema, database_name: str, table_name: str
 ) -> None:
+    catalog_name = "glue"
     identifier = (database_name, table_name)
-    test_catalog = GlueCatalog("glue", **{"py-io-impl": 
"pyiceberg.io.fsspec.FsspecFileIO"})
+    test_catalog = GlueCatalog(catalog_name, **{"py-io-impl": 
"pyiceberg.io.fsspec.FsspecFileIO"})
     test_catalog.create_namespace(namespace=database_name)
     with pytest.raises(ValueError):
         test_catalog.create_table(identifier=identifier, 
schema=table_schema_nested)
@@ -87,11 +93,12 @@ def test_create_table_with_no_location(
 def test_create_table_with_strips(
     _bucket_initialize: None, _patch_aiobotocore: None, table_schema_nested: 
Schema, database_name: str, table_name: str
 ) -> None:
+    catalog_name = "glue"
     identifier = (database_name, table_name)
-    test_catalog = GlueCatalog("glue", **{"py-io-impl": 
"pyiceberg.io.fsspec.FsspecFileIO"})
+    test_catalog = GlueCatalog(catalog_name, **{"py-io-impl": 
"pyiceberg.io.fsspec.FsspecFileIO"})
     test_catalog.create_namespace(namespace=database_name, 
properties={"location": f"s3://{BUCKET_NAME}/{database_name}.db/"})
     table = test_catalog.create_table(identifier, table_schema_nested)
-    assert table.identifier == identifier
+    assert table.identifier == (catalog_name,) + identifier
     assert TABLE_METADATA_LOCATION_REGEX.match(table.metadata_location)
 
 
@@ -99,11 +106,12 @@ def test_create_table_with_strips(
 def test_create_table_with_strips_bucket_root(
     _bucket_initialize: None, _patch_aiobotocore: None, table_schema_nested: 
Schema, database_name: str, table_name: str
 ) -> None:
+    catalog_name = "glue"
     identifier = (database_name, table_name)
     test_catalog = GlueCatalog("glue", **{"py-io-impl": 
"pyiceberg.io.fsspec.FsspecFileIO", "warehouse": f"s3://{BUCKET_NAME}/"})
     test_catalog.create_namespace(namespace=database_name)
     table_strip = test_catalog.create_table(identifier, table_schema_nested)
-    assert table_strip.identifier == identifier
+    assert table_strip.identifier == (catalog_name,) + identifier
     assert TABLE_METADATA_LOCATION_REGEX.match(table_strip.metadata_location)
 
 
@@ -133,12 +141,15 @@ def test_create_duplicated_table(
 def test_load_table(
     _bucket_initialize: None, _patch_aiobotocore: None, table_schema_nested: 
Schema, database_name: str, table_name: str
 ) -> None:
+    catalog_name = "glue"
     identifier = (database_name, table_name)
-    test_catalog = GlueCatalog("glue", **{"py-io-impl": 
"pyiceberg.io.fsspec.FsspecFileIO", "warehouse": f"s3://{BUCKET_NAME}/"})
+    test_catalog = GlueCatalog(
+        catalog_name, **{"py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO", 
"warehouse": f"s3://{BUCKET_NAME}/"}
+    )
     test_catalog.create_namespace(namespace=database_name)
     test_catalog.create_table(identifier, table_schema_nested)
     table = test_catalog.load_table(identifier)
-    assert table.identifier == identifier
+    assert table.identifier == (catalog_name,) + identifier
     assert TABLE_METADATA_LOCATION_REGEX.match(table.metadata_location)
 
 
@@ -155,12 +166,15 @@ def test_load_non_exist_table(_bucket_initialize: None, 
_patch_aiobotocore: None
 def test_drop_table(
     _bucket_initialize: None, _patch_aiobotocore: None, table_schema_nested: 
Schema, database_name: str, table_name: str
 ) -> None:
+    catalog_name = "glue"
     identifier = (database_name, table_name)
-    test_catalog = GlueCatalog("glue", **{"py-io-impl": 
"pyiceberg.io.fsspec.FsspecFileIO", "warehouse": f"s3://{BUCKET_NAME}/"})
+    test_catalog = GlueCatalog(
+        catalog_name, **{"py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO", 
"warehouse": f"s3://{BUCKET_NAME}/"}
+    )
     test_catalog.create_namespace(namespace=database_name)
     test_catalog.create_table(identifier, table_schema_nested)
     table = test_catalog.load_table(identifier)
-    assert table.identifier == identifier
+    assert table.identifier == (catalog_name,) + identifier
     assert TABLE_METADATA_LOCATION_REGEX.match(table.metadata_location)
     test_catalog.drop_table(identifier)
     with pytest.raises(NoSuchTableError):
@@ -179,17 +193,18 @@ def test_drop_non_exist_table(_bucket_initialize: None, 
_patch_aiobotocore: None
 def test_rename_table(
     _bucket_initialize: None, _patch_aiobotocore: None, table_schema_nested: 
Schema, database_name: str, table_name: str
 ) -> None:
+    catalog_name = "glue"
     new_table_name = f"{table_name}_new"
     identifier = (database_name, table_name)
     new_identifier = (database_name, new_table_name)
     test_catalog = GlueCatalog("glue", **{"py-io-impl": 
"pyiceberg.io.fsspec.FsspecFileIO", "warehouse": f"s3://{BUCKET_NAME}/"})
     test_catalog.create_namespace(namespace=database_name)
     table = test_catalog.create_table(identifier, table_schema_nested)
-    assert table.identifier == identifier
+    assert table.identifier == (catalog_name,) + identifier
     assert TABLE_METADATA_LOCATION_REGEX.match(table.metadata_location)
     test_catalog.rename_table(identifier, new_identifier)
     new_table = test_catalog.load_table(new_identifier)
-    assert new_table.identifier == new_identifier
+    assert new_table.identifier == (catalog_name,) + new_identifier
     # the metadata_location should not change
     assert new_table.metadata_location == table.metadata_location
     # old table should be dropped
diff --git a/python/tests/catalog/test_hive.py 
b/python/tests/catalog/test_hive.py
index fee3a4f731..23bc0208b6 100644
--- a/python/tests/catalog/test_hive.py
+++ b/python/tests/catalog/test_hive.py
@@ -390,7 +390,7 @@ def test_load_table(hive_table: HiveTable) -> None:
         last_sequence_number=34,
     )
 
-    assert table.identifier == ("default", "new_tabl2e")
+    assert table.identifier == (HIVE_CATALOG_NAME, "default", "new_tabl2e")
     assert expected == table.metadata
 
 

Reply via email to