This is an automated email from the ASF dual-hosted git repository.

russellspitzer pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/polaris.git


The following commit(s) were added to refs/heads/main by this push:
     new adc3e602 Support catalog backed by s3 in run_spark_sql.sh (#199)
adc3e602 is described below

commit adc3e602f37ebf635a268253f5c8d2639c533c67
Author: Yufei Gu <[email protected]>
AuthorDate: Tue Sep 3 07:15:42 2024 -0700

    Support catalog backed by s3 in run_spark_sql.sh (#199)
    
    
    Co-authored-by: Yufei Gu <yufei.apache.org>
---
 regtests/run_spark_sql.sh | 97 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 70 insertions(+), 27 deletions(-)

diff --git a/regtests/run_spark_sql.sh b/regtests/run_spark_sql.sh
index d1d29e76..9e1a53e2 100755
--- a/regtests/run_spark_sql.sh
+++ b/regtests/run_spark_sql.sh
@@ -16,11 +16,32 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+
+# -----------------------------------------------------------------------------
+# Purpose: Launch the Spark SQL shell to interact with Polaris.
+# -----------------------------------------------------------------------------
+#
+# Usage:
+#   ./run_spark_sql.sh [S3-location AWS-IAM-role]
 #
+# Description:
+#   - Without arguments: Runs against a catalog backed by the local filesystem.
+#   - With two arguments: Runs against a catalog backed by AWS S3.
+#       - [S3-location]  - The S3 path to use as the default base location for 
the catalog.
+#       - [AWS-IAM-role] - The AWS IAM role for catalog to assume when 
accessing the S3 location.
 #
-# Run this to open an interactive spark-sql shell talking to a catalog named 
"manual_spark"
+# Examples:
+#   - Run against local filesystem:
+#     ./run_spark_sql.sh
 #
-# You must run 'use polaris;' as your first query in the spark-sql shell.
+#   - Run against AWS S3:
+#     ./run_spark_sql.sh s3://my-bucket/path 
arn:aws:iam::123456789001:role/my-role
+
+if [ $# -ne 0 ] && [ $# -ne 2 ]; then
+  echo "run_spark_sql.sh only accepts 0 or 2 arguments"
+  echo "Usage: ./run_spark_sql.sh [S3-location AWS-IAM-role]"
+  exit 1
+fi
 
 REGTEST_HOME=$(dirname $(realpath $0))
 cd ${REGTEST_HOME}
@@ -36,37 +57,60 @@ fi
 
 
SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN:-principal:root;realm:default-realm}"
 
-# Use local filesystem by default
-curl -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: 
application/json' -H 'Content-Type: application/json' \
-  http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \
-  -d '{
-        "catalog": {
-          "name": "manual_spark",
-          "type": "INTERNAL",
-          "readOnly": false,
-          "properties": {
-            "default-base-location": "file:///tmp/polaris/"
-          },
-          "storageConfigInfo": {
-            "storageType": "FILE",
-            "allowedLocations": [
-              "file:///tmp"
-            ]
-          }
-        }
-      }'
+if [ $# -eq 0 ]; then
+  # create a catalog backed by the local filesystem
+  curl -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \
+       -H 'Accept: application/json' \
+       -H 'Content-Type: application/json' \
+       http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \
+       -d '{
+             "catalog": {
+               "name": "manual_spark",
+               "type": "INTERNAL",
+               "readOnly": false,
+               "properties": {
+                 "default-base-location": "file:///tmp/polaris/"
+               },
+               "storageConfigInfo": {
+                 "storageType": "FILE",
+                 "allowedLocations": [
+                   "file:///tmp"
+                 ]
+               }
+             }
+           }'
+
+elif [ $# -eq 2 ]; then
+  # create a catalog backed by S3
+  S3_LOCATION=$1
+  AWS_IAM_ROLE=$2
 
-# Use the following instead of below to use s3 instead of local filesystem
-#curl -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: 
application/json' -H 'Content-Type: application/json' \
-#  http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \
-#  -d "{\"name\": \"manual_spark\", \"id\": 100, \"type\": \"INTERNAL\", 
\"readOnly\": false, \"properties\": {\"default-base-location\": 
\"s3://${S3_BUCKET}/${USER}/polaris/\"}}"
+  curl -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" \
+       -H 'Accept: application/json' \
+       -H 'Content-Type: application/json' \
+       http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \
+       -d "{
+             \"name\": \"manual_spark\",
+             \"id\": 100,
+             \"type\": \"INTERNAL\",
+             \"readOnly\": false,
+             \"properties\": {
+               \"default-base-location\": \"${S3_LOCATION}\"
+             },
+             \"storageConfigInfo\": {
+               \"storageType\": \"S3\",
+               \"allowedLocations\": [\"${S3_LOCATION}/\"],
+               \"roleArn\": \"${AWS_IAM_ROLE}\"
+             }
+           }"
+fi
 
 # Add TABLE_WRITE_DATA to the catalog's catalog_admin role since by default it 
can only manage access and metadata
 curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: 
application/json' -H 'Content-Type: application/json' \
   
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark/catalog-roles/catalog_admin/grants
 \
   -d '{"type": "catalog", "privilege": "TABLE_WRITE_DATA"}' > /dev/stderr
 
-# For now, also explicitly assign the catalog_admin to the service_admin. 
Remove once GS fully rolled out for auto-assign.
+# Assign the catalog_admin to the service_admin.
 curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: 
application/json' -H 'Content-Type: application/json' \
   
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principal-roles/service_admin/catalog-roles/manual_spark
 \
   -d '{"name": "catalog_admin"}' > /dev/stderr
@@ -74,7 +118,6 @@ curl -i -X PUT -H "Authorization: Bearer 
${SPARK_BEARER_TOKEN}" -H 'Accept: appl
 curl -X GET -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: 
application/json' -H 'Content-Type: application/json' \
   
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/manual_spark
 
-echo ${SPARK_HOME}/bin/spark-sql -S --conf 
spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}"
 ${SPARK_HOME}/bin/spark-sql -S --conf 
spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" \
   --conf spark.sql.catalog.polaris.warehouse=manual_spark \
   --conf spark.sql.defaultCatalog=polaris \

Reply via email to