Re: [PR] fix(Catalog): Add List PolarisStorageAction for all metadata read operations [polaris]

via GitHub Mon, 05 May 2025 09:27:04 -0700


singhpk234 commented on code in PR #1391:
URL: https://github.com/apache/polaris/pull/1391#discussion_r2073766661



##########
regtests/t_pyspark/src/test_spark_sql_s3_with_privileges.py:
##########
@@ -1149,6 +1149,78 @@ def test_spark_ctas(snowflake_catalog, 
polaris_catalog_url, snowman):
     spark.sql(f"drop table {table_name}_t2 PURGE")
 
 
[email protected](os.environ.get('AWS_TEST_ENABLED', 'False').lower() != 
'true',
+                    reason='AWS_TEST_ENABLED is not set or is false')
+def test_spark_credentials_s3_exceptions(root_client, snowflake_catalog, 
polaris_catalog_url,
+                                                snowman, 
snowman_catalog_client, test_bucket, aws_bucket_base_location_prefix):
+    """
+    Create a using Spark. Then call the loadTable api directly with snowman 
token to fetch the vended credentials
+    for the first table.
+    Delete the metadata directory and try to access the table using the vended 
credentials.
+    It should throw 404 exception
+    :param root_client:
+    :param snowflake_catalog:
+    :param polaris_catalog_url:
+    :param snowman_catalog_client:
+    :param reader_catalog_client:
+    :return:
+    """
+    with 
IcebergSparkSession(credentials=f'{snowman.principal.client_id}:{snowman.credentials.client_secret}',
+                             catalog_name=snowflake_catalog.name,
+                             polaris_url=polaris_catalog_url) as spark:
+        spark.sql(f'USE {snowflake_catalog.name}')
+        spark.sql('CREATE NAMESPACE db1')
+        spark.sql('CREATE NAMESPACE db1.schema')
+        spark.sql('USE db1.schema')
+        spark.sql('CREATE TABLE iceberg_table (col1 int, col2 string)')
+
+    response = snowman_catalog_client.load_table(snowflake_catalog.name, 
unquote('db1%1Fschema'),
+                                                        "iceberg_table",
+                                                        "vended-credentials")
+    assert response.config is not None
+    assert 's3.access-key-id' in response.config
+    assert 's3.secret-access-key' in response.config
+    assert 's3.session-token' in response.config
+
+    s3 = boto3.client('s3',
+                      aws_access_key_id=response.config['s3.access-key-id'],
+                      
aws_secret_access_key=response.config['s3.secret-access-key'],
+                      aws_session_token=response.config['s3.session-token'])
+
+    objects = s3.list_objects(Bucket=test_bucket, Delimiter='/',
+                              
Prefix=f'{aws_bucket_base_location_prefix}/snowflake_catalog/db1/schema/iceberg_table/metadata/')
+    assert objects is not None
+    assert 'Contents' in objects
+    assert len(objects['Contents']) > 0
+
+    metadata_file = next(f for f in objects['Contents'] if 
f['Key'].endswith('metadata.json'))
+    assert metadata_file is not None
+
+    metadata_contents = s3.get_object(Bucket=test_bucket, 
Key=metadata_file['Key'])
+    assert metadata_contents is not None
+    assert metadata_contents['ContentLength'] > 0
+
+    s3.delete_objects(Bucket=test_bucket,
+                      Delete={'Objects': objects})

Review Comment:
   so we only delete metadata obj ?  I am assuming it's just the metadata.json 
if deleted should be fine ? or we neeed to delete manifest and manifest list 
too, to re-produce.



##########
regtests/t_pyspark/src/test_spark_sql_s3_with_privileges.py:
##########
@@ -1149,6 +1149,78 @@ def test_spark_ctas(snowflake_catalog, 
polaris_catalog_url, snowman):
     spark.sql(f"drop table {table_name}_t2 PURGE")
 
 
[email protected](os.environ.get('AWS_TEST_ENABLED', 'False').lower() != 
'true',
+                    reason='AWS_TEST_ENABLED is not set or is false')
+def test_spark_credentials_s3_exceptions(root_client, snowflake_catalog, 
polaris_catalog_url,
+                                                snowman, 
snowman_catalog_client, test_bucket, aws_bucket_base_location_prefix):
+    """
+    Create a using Spark. Then call the loadTable api directly with snowman 
token to fetch the vended credentials
+    for the first table.
+    Delete the metadata directory and try to access the table using the vended 
credentials.
+    It should throw 404 exception
+    :param root_client:
+    :param snowflake_catalog:
+    :param polaris_catalog_url:
+    :param snowman_catalog_client:
+    :param reader_catalog_client:
+    :return:
+    """
+    with 
IcebergSparkSession(credentials=f'{snowman.principal.client_id}:{snowman.credentials.client_secret}',
+                             catalog_name=snowflake_catalog.name,
+                             polaris_url=polaris_catalog_url) as spark:
+        spark.sql(f'USE {snowflake_catalog.name}')
+        spark.sql('CREATE NAMESPACE db1')
+        spark.sql('CREATE NAMESPACE db1.schema')
+        spark.sql('USE db1.schema')
+        spark.sql('CREATE TABLE iceberg_table (col1 int, col2 string)')
+
+    response = snowman_catalog_client.load_table(snowflake_catalog.name, 
unquote('db1%1Fschema'),
+                                                        "iceberg_table",
+                                                        "vended-credentials")
+    assert response.config is not None
+    assert 's3.access-key-id' in response.config
+    assert 's3.secret-access-key' in response.config
+    assert 's3.session-token' in response.config
+
+    s3 = boto3.client('s3',
+                      aws_access_key_id=response.config['s3.access-key-id'],
+                      
aws_secret_access_key=response.config['s3.secret-access-key'],
+                      aws_session_token=response.config['s3.session-token'])
+
+    objects = s3.list_objects(Bucket=test_bucket, Delimiter='/',
+                              
Prefix=f'{aws_bucket_base_location_prefix}/snowflake_catalog/db1/schema/iceberg_table/metadata/')
+    assert objects is not None
+    assert 'Contents' in objects
+    assert len(objects['Contents']) > 0

Review Comment:
   minor : I would add a small comment like 
   
   check if there is metadata present.



##########
regtests/t_pyspark/src/test_spark_sql_s3_with_privileges.py:
##########
@@ -1149,6 +1149,78 @@ def test_spark_ctas(snowflake_catalog, 
polaris_catalog_url, snowman):
     spark.sql(f"drop table {table_name}_t2 PURGE")
 
 
[email protected](os.environ.get('AWS_TEST_ENABLED', 'False').lower() != 
'true',
+                    reason='AWS_TEST_ENABLED is not set or is false')
+def test_spark_credentials_s3_exceptions(root_client, snowflake_catalog, 
polaris_catalog_url,

Review Comment:
   can we have more descriptive name ? 
   
   suggestion 
   ```suggestion
   def test_s3_exception_on_deleted_table_via_spark(root_client, 
snowflake_catalog, polaris_catalog_url,
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] fix(Catalog): Add List PolarisStorageAction for all metadata read operations [polaris]

Reply via email to