This is an automated email from the ASF dual-hosted git repository. dhuo pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/polaris.git
The following commit(s) were added to refs/heads/main by this push: new 22e4c6861 Add a regression test for Catalog Federation (#2286) 22e4c6861 is described below commit 22e4c68613639dd756a19d0431b37aeaf03cda7b Author: Pooja Nilangekar <nilangekar.po...@gmail.com> AuthorDate: Thu Aug 14 14:58:08 2025 -0400 Add a regression test for Catalog Federation (#2286) * Add a regression test for Catalog Federation * Install jq dependency * Fix token issues * Update regtests/README.md Co-authored-by: Eric Maynard <emayn...@apache.org> * Update README.md --------- Co-authored-by: Eric Maynard <emayn...@apache.org> --- regtests/Dockerfile | 2 +- regtests/README.md | 10 +- regtests/docker-compose.yml | 2 + .../ref/catalog_federation.sh.ref | 61 +++++++ .../t_catalog_federation/src/catalog_federation.sh | 196 +++++++++++++++++++++ 5 files changed, 269 insertions(+), 2 deletions(-) diff --git a/regtests/Dockerfile b/regtests/Dockerfile index 8e19979e3..983f92f87 100644 --- a/regtests/Dockerfile +++ b/regtests/Dockerfile @@ -25,7 +25,7 @@ ENV LANGUAGE='en_US:en' USER root RUN apt update -RUN apt-get install -y diffutils wget curl python3.10-venv +RUN apt-get install -y diffutils wget curl python3.10-venv jq RUN mkdir -p /home/spark && \ chown -R spark /home/spark && \ mkdir -p /tmp/polaris-regtests && \ diff --git a/regtests/README.md b/regtests/README.md index e36cbeb08..7a51160b8 100644 --- a/regtests/README.md +++ b/regtests/README.md @@ -85,6 +85,14 @@ project, just run: env POLARIS_HOST=localhost ./regtests/run.sh ``` +The catalog federation tests rely on the following configurations in `application.properties` to +be set in order to succeed. + +``` +polaris.features."ENABLE_CATALOG_FEDERATION"=true +polaris.features."ALLOW_OVERLAPPING_CATALOG_URLS"=true +``` + To run the tests in verbose mode, with test stdout printing to console, set the `VERBOSE` environment variable to `1`; you can also choose to run only a subset of tests by specifying the test directories as arguments to `run.sh`. For example, to run only the `t_spark_sql` tests in @@ -208,4 +216,4 @@ and download all of the test dependencies into it. From here, `run.sh` will be a To debug, setup IntelliJ to point at your virtual environment to find your test dependencies (see https://www.jetbrains.com/help/idea/configuring-python-sdk.html). Then run the test in your IDE. -The above is handled automatically when running reg tests from the docker image. \ No newline at end of file +The above is handled automatically when running reg tests from the docker image. diff --git a/regtests/docker-compose.yml b/regtests/docker-compose.yml index 6da159ce4..fb118fdea 100644 --- a/regtests/docker-compose.yml +++ b/regtests/docker-compose.yml @@ -37,6 +37,8 @@ services: polaris.features."DROP_WITH_PURGE_ENABLED": "true" polaris.features."ALLOW_INSECURE_STORAGE_TYPES": "true" polaris.features."SUPPORTED_CATALOG_STORAGE_TYPES": "[\"FILE\",\"S3\",\"GCS\",\"AZURE\"]" + polaris.features."ALLOW_OVERLAPPING_CATALOG_URLS": "true" + polaris.features."ENABLE_CATALOG_FEDERATION": "true" polaris.readiness.ignore-severe-issues: "true" volumes: - ./credentials:/tmp/credentials/ diff --git a/regtests/t_catalog_federation/ref/catalog_federation.sh.ref b/regtests/t_catalog_federation/ref/catalog_federation.sh.ref new file mode 100644 index 000000000..7e61c2092 --- /dev/null +++ b/regtests/t_catalog_federation/ref/catalog_federation.sh.ref @@ -0,0 +1,61 @@ +=== Setting up Catalog Federation Test === +Creating new principal... +Creating local catalog... +Create local catalog response code: 201 +Setting up permissions... +Grant TABLE_WRITE_DATA to catalog_admin response code: 201 +Assign catalog_admin to service_admin response code: 201 +Assign service_admin to new-user response code: 201 +Creating external catalog (passthrough facade)... +Create external catalog response code: 201 +Setting up permissions for external catalog... +Grant TABLE_WRITE_DATA to external catalog_admin response code: 201 +Assign catalog_admin to service_admin for external catalog response code: 201 +Catalogs created successfully + +=== Starting federation test === +=== Creating data via LOCAL catalog === +spark-sql ()> use polaris; +spark-sql ()> create namespace if not exists ns1; +spark-sql ()> create table if not exists ns1.test_table (id int, name string); +spark-sql ()> insert into ns1.test_table values (1, 'Alice'); +spark-sql ()> insert into ns1.test_table values (2, 'Bob'); +spark-sql ()> create namespace if not exists ns2; +spark-sql ()> create table if not exists ns2.test_table (id int, name string); +spark-sql ()> insert into ns2.test_table values (1, 'Apache Spark'); +spark-sql ()> insert into ns2.test_table values (2, 'Apache Iceberg'); +spark-sql ()> +=== Accessing data via EXTERNAL catalog === +spark-sql ()> use polaris; +spark-sql ()> show namespaces; +ns1 +ns2 +spark-sql ()> select * from ns1.test_table order by id; +1 Alice +2 Bob +spark-sql ()> insert into ns1.test_table values (3, 'Charlie'); +spark-sql ()> select * from ns2.test_table order by id; +1 Apache Spark +2 Apache Iceberg +spark-sql ()> insert into ns2.test_table values (3, 'Apache Polaris'); +spark-sql ()> +=== Verifying federation via LOCAL catalog === +spark-sql ()> use polaris; +spark-sql ()> select * from ns1.test_table order by id; +1 Alice +2 Bob +3 Charlie +spark-sql ()> select * from ns2.test_table order by id; +1 Apache Spark +2 Apache Iceberg +3 Apache Polaris +spark-sql ()> drop table ns1.test_table; +spark-sql ()> drop table ns2.test_table; +spark-sql ()> drop namespace ns1; +spark-sql ()> drop namespace ns2; +spark-sql ()> +=== Cleaning up catalogs and principal === +Delete external catalog response code: 204 +Delete local catalog response code: 204 +Delete principal response code: 204 +Catalog federation test completed successfully! diff --git a/regtests/t_catalog_federation/src/catalog_federation.sh b/regtests/t_catalog_federation/src/catalog_federation.sh new file mode 100755 index 000000000..a30786503 --- /dev/null +++ b/regtests/t_catalog_federation/src/catalog_federation.sh @@ -0,0 +1,196 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# This test creates an INTERNAL catalog and an EXTERNAL catalog with passthrough facade +# to demonstrate true catalog federation. + +set -e + + +SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN}" + +echo "=== Setting up Catalog Federation Test ===" + +# Step 1: Create a new principal +echo "Creating new principal..." +PRINCIPAL_RESPONSE=$(curl -s -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principals \ + -d '{ + "principal": { + "name": "new-user" + } + }') + +NEW_CLIENT_ID=$(echo "$PRINCIPAL_RESPONSE" | jq -r '.credentials.clientId') +NEW_CLIENT_SECRET=$(echo "$PRINCIPAL_RESPONSE" | jq -r '.credentials.clientSecret') + +# Step 2: Create local catalog +echo "Creating local catalog..." +RESPONSE_CODE=$(curl -s -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \ + -d '{ + "type": "INTERNAL", + "name": "test-catalog-local", + "properties": { + "default-base-location": "file:///tmp/warehouse" + }, + "storageConfigInfo": { + "storageType": "FILE", + "allowedLocations": ["file:///tmp/warehouse"] + } + }' \ + --write-out "%{http_code}") +echo "Create local catalog response code: $RESPONSE_CODE" + + + +# Step 3: Grant permissions +echo "Setting up permissions..." + +# Grant TABLE_WRITE_DATA privilege to catalog_admin for local catalog +RESPONSE_CODE=$(curl -s -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/test-catalog-local/catalog-roles/catalog_admin/grants \ + -d '{"type": "catalog", "privilege": "TABLE_WRITE_DATA"}' \ + --write-out "%{http_code}") +echo "Grant TABLE_WRITE_DATA to catalog_admin response code: $RESPONSE_CODE" + +# Assign catalog_admin to service_admin +RESPONSE_CODE=$(curl -s -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principal-roles/service_admin/catalog-roles/test-catalog-local \ + -d '{"name": "catalog_admin"}' \ + --write-out "%{http_code}") +echo "Assign catalog_admin to service_admin response code: $RESPONSE_CODE" + +# Assign service_admin to new-user +RESPONSE_CODE=$(curl -s -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principals/new-user/principal-roles \ + -d '{"name": "service_admin"}' \ + --write-out "%{http_code}") +echo "Assign service_admin to new-user response code: $RESPONSE_CODE" + +# Step 4: Create external catalog +echo "Creating external catalog (passthrough facade)..." +RESPONSE_CODE=$(curl -s -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \ + -d "{ + \"type\": \"EXTERNAL\", + \"name\": \"test-catalog-external\", + \"connectionConfigInfo\": { + \"connectionType\": \"ICEBERG_REST\", + \"uri\": \"http://${POLARIS_HOST:-localhost}:8181/api/catalog\", + \"remoteCatalogName\": \"test-catalog-local\", + \"authenticationParameters\": { + \"authenticationType\": \"OAUTH\", + \"tokenUri\": \"http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/oauth/tokens\", + \"clientId\": \"${NEW_CLIENT_ID}\", + \"clientSecret\": \"${NEW_CLIENT_SECRET}\", + \"scopes\": [\"PRINCIPAL_ROLE:ALL\"] + } + }, + \"properties\": { + \"default-base-location\": \"file:///tmp/warehouse\" + }, + \"storageConfigInfo\": { + \"storageType\": \"FILE\", + \"allowedLocations\": [\"file:///tmp/warehouse\"] + } + }" \ + --write-out "%{http_code}") +echo "Create external catalog response code: $RESPONSE_CODE" + +# Step 5: Grant permissions for external catalog +echo "Setting up permissions for external catalog..." + +# Grant TABLE_WRITE_DATA privilege to catalog_admin role for test-catalog-external +RESPONSE_CODE=$(curl -s -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/test-catalog-external/catalog-roles/catalog_admin/grants \ + -d '{"type": "catalog", "privilege": "TABLE_WRITE_DATA"}' \ + --write-out "%{http_code}") +echo "Grant TABLE_WRITE_DATA to external catalog_admin response code: $RESPONSE_CODE" + +# Assign catalog_admin role to service_admin principal-role for test-catalog-external +RESPONSE_CODE=$(curl -s -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principal-roles/service_admin/catalog-roles/test-catalog-external \ + -d '{"name": "catalog_admin"}' \ + --write-out "%{http_code}") +echo "Assign catalog_admin to service_admin for external catalog response code: $RESPONSE_CODE" + +echo "Catalogs created successfully" + +echo "" +echo "=== Starting federation test ===" + +# Test data operations via local catalog +echo "=== Creating data via LOCAL catalog ===" +cat << EOF | ${SPARK_HOME}/bin/spark-sql -S --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" --conf spark.sql.catalog.polaris.warehouse=test-catalog-local --conf spark.sql.defaultCatalog=polaris --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions +use polaris; +create namespace if not exists ns1; +create table if not exists ns1.test_table (id int, name string); +insert into ns1.test_table values (1, 'Alice'); +insert into ns1.test_table values (2, 'Bob'); +create namespace if not exists ns2; +create table if not exists ns2.test_table (id int, name string); +insert into ns2.test_table values (1, 'Apache Spark'); +insert into ns2.test_table values (2, 'Apache Iceberg'); +EOF + +echo "" +echo "=== Accessing data via EXTERNAL catalog ===" +cat << EOF | ${SPARK_HOME}/bin/spark-sql -S --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" --conf spark.sql.catalog.polaris.warehouse=test-catalog-external --conf spark.sql.defaultCatalog=polaris --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions +use polaris; +show namespaces; +select * from ns1.test_table order by id; +insert into ns1.test_table values (3, 'Charlie'); +select * from ns2.test_table order by id; +insert into ns2.test_table values (3, 'Apache Polaris'); +EOF + +echo "" +echo "=== Verifying federation via LOCAL catalog ===" +cat << EOF | ${SPARK_HOME}/bin/spark-sql -S --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" --conf spark.sql.catalog.polaris.warehouse=test-catalog-local --conf spark.sql.defaultCatalog=polaris --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions +use polaris; +select * from ns1.test_table order by id; +select * from ns2.test_table order by id; +drop table ns1.test_table; +drop table ns2.test_table; +drop namespace ns1; +drop namespace ns2; +EOF + +echo "" +echo "=== Cleaning up catalogs and principal ===" +# Clean up catalogs +RESPONSE_CODE=$(curl -X DELETE -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/test-catalog-external \ + --write-out "%{http_code}") +echo "Delete external catalog response code: $RESPONSE_CODE" + +RESPONSE_CODE=$(curl -X DELETE -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/test-catalog-local \ + --write-out "%{http_code}") +echo "Delete local catalog response code: $RESPONSE_CODE" + +# Clean up principal +RESPONSE_CODE=$(curl -X DELETE -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principals/new-user \ + --write-out "%{http_code}") +echo "Delete principal response code: $RESPONSE_CODE" + +echo "Catalog federation test completed successfully!" \ No newline at end of file